<a href="https://colab.research.google.com/github/KurniaKhaikal/Corel-CBIR/blob/main/VGG16_CBIR_MERGE-testin-almostfix.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [35]:
import numpy as np
import json
import os
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model

In [36]:
from imutils import paths
import cv2
from sklearn.preprocessing import LabelBinarizer

In [37]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [38]:
base_dataset = "/content/drive/MyDrive/Corel-1k/dataset"
class_dir = ['beaches', 'bus', 'dinosaurs',
             'elephants', 'flowers', 'foods',
             'horses', 'monuments', 'mountains_and_snow','peolpe_and_villages_in_Africa'
             ]
IMAGE_SIZE = (224, 224)

In [39]:
# indexing file images
dataset_train = []
for class_item in class_dir:
    cur_dir = os.path.join(base_dataset, 'training_set', class_item)
    for file in os.listdir(cur_dir):
        dataset_train.append(os.path.join(cur_dir, file))

In [98]:
dataset_train

['/content/drive/MyDrive/Corel-1k/dataset/training_set/beaches/146.jpg',
 '/content/drive/MyDrive/Corel-1k/dataset/training_set/beaches/110.jpg',
 '/content/drive/MyDrive/Corel-1k/dataset/training_set/beaches/161.jpg',
 '/content/drive/MyDrive/Corel-1k/dataset/training_set/beaches/121.jpg',
 '/content/drive/MyDrive/Corel-1k/dataset/training_set/beaches/152.jpg',
 '/content/drive/MyDrive/Corel-1k/dataset/training_set/beaches/155.jpg',
 '/content/drive/MyDrive/Corel-1k/dataset/training_set/beaches/144.jpg',
 '/content/drive/MyDrive/Corel-1k/dataset/training_set/beaches/164.jpg',
 '/content/drive/MyDrive/Corel-1k/dataset/training_set/beaches/126.jpg',
 '/content/drive/MyDrive/Corel-1k/dataset/training_set/beaches/112.jpg',
 '/content/drive/MyDrive/Corel-1k/dataset/training_set/beaches/174.jpg',
 '/content/drive/MyDrive/Corel-1k/dataset/training_set/beaches/153.jpg',
 '/content/drive/MyDrive/Corel-1k/dataset/training_set/beaches/140.jpg',
 '/content/drive/MyDrive/Corel-1k/dataset/training_

In [40]:
print("train:", len(dataset_train))

train: 900


In [41]:
print("[INFO] load images Corel-Image 1k dataset...")
#  load images
train_images = []
for image_path in dataset_train:
    if ".jpg" or ".jpeg" in image_path:
        image = cv2.imread(image_path)
        image = cv2.resize(image, IMAGE_SIZE)
        train_images.append(image)

[INFO] load images Corel-Image 1k dataset...


In [42]:
# normalization
print("[INFO] normalization...")
train_x = np.array(train_images).astype("float32") / 255.0

[INFO] normalization...


In [43]:
from tensorflow.keras.applications.vgg16 import VGG16
modelVGG16 = VGG16(weights="imagenet")
model = Model(inputs=modelVGG16.input, outputs=modelVGG16.get_layer("fc1").output)
model.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0   

In [44]:
features = model.predict(train_x)

In [45]:
indexes = list(range(0, train_x.shape[0]))
features_array = [[float(x) for x in y] for y in features]
labels = [path.split("/")[7] for path in dataset_train]
data_train = {"indexes": indexes, "features": features_array, "locations": dataset_train, "labels":labels}

In [46]:
with open('/content/drive/MyDrive/Corel-1k/dataset/feature_extraction1.json', 'w') as f:
    json.dump(data_train, f)

**RETRIEVAL**

In [47]:
import copy
import numpy as np
import os
import json
import cv2
import sklearn.metrics as metric
from tensorflow.keras.models import Model
import pandas as pd

In [48]:
def euclidean(a, b):
	return np.linalg.norm(a - b)

In [49]:
def perform_search(query_features, indexed_train, max_results=12):
	retrieved = []
	for idx in range(0, len(indexed_train["features"])):
		distance = euclidean(query_features, indexed_train["features"][idx])
		retrieved.append((distance, idx))
	retrieved = sorted(retrieved)[:max_results]
	return retrieved

In [50]:
base_dataset = "/content/drive/MyDrive/Corel-1k/dataset"
class_dir = ['beaches', 'bus', 'dinosaurs',
             'elephants', 'flowers', 'foods',
             'horses', 'monuments', 'mountains_and_snow','peolpe_and_villages_in_Africa'
             ]

IMAGE_SIZE = (224, 224)

In [51]:
# indexing file images
dataset_test = []
for class_item in class_dir:
    cur_dir = os.path.join(base_dataset, 'test_set', class_item)
    for file in os.listdir(cur_dir):
        dataset_test.append(os.path.join(cur_dir, file))

In [95]:
dataset_test

['/content/drive/MyDrive/Corel-1k/dataset/test_set/beaches/100.jpg',
 '/content/drive/MyDrive/Corel-1k/dataset/test_set/beaches/103.jpg',
 '/content/drive/MyDrive/Corel-1k/dataset/test_set/beaches/101.jpg',
 '/content/drive/MyDrive/Corel-1k/dataset/test_set/beaches/104.jpg',
 '/content/drive/MyDrive/Corel-1k/dataset/test_set/beaches/102.jpg',
 '/content/drive/MyDrive/Corel-1k/dataset/test_set/beaches/105.jpg',
 '/content/drive/MyDrive/Corel-1k/dataset/test_set/beaches/106.jpg',
 '/content/drive/MyDrive/Corel-1k/dataset/test_set/beaches/107.jpg',
 '/content/drive/MyDrive/Corel-1k/dataset/test_set/beaches/109.jpg',
 '/content/drive/MyDrive/Corel-1k/dataset/test_set/beaches/108.jpg',
 '/content/drive/MyDrive/Corel-1k/dataset/test_set/bus/305.jpg',
 '/content/drive/MyDrive/Corel-1k/dataset/test_set/bus/301.jpg',
 '/content/drive/MyDrive/Corel-1k/dataset/test_set/bus/306.jpg',
 '/content/drive/MyDrive/Corel-1k/dataset/test_set/bus/304.jpg',
 '/content/drive/MyDrive/Corel-1k/dataset/test_set

In [52]:
print("len to retrieving:", len(dataset_test))

len to retrieving: 100


In [53]:
print("[INFO] load images Corel-Image 1k dataset...")
#  load images
test_images = []
for image_path in dataset_test:
    if ".jpg" or ".jpeg" in image_path:
        image = cv2.imread(image_path)
        image = cv2.resize(image, IMAGE_SIZE)
        test_images.append(image)

[INFO] load images Corel-Image 1k dataset...


In [54]:
print("[INFO] normalization...")
test_x = np.array(test_images).astype("float32") / 255.0

[INFO] normalization...


In [55]:
from tensorflow.keras.applications import VGG16
modelVGG16 = VGG16(weights="imagenet")
model = Model(inputs=modelVGG16.input, outputs=modelVGG16.get_layer("fc1").output)
model.summary()

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0   

In [56]:
features = model.predict(test_x)

In [57]:
with open('/content/drive/MyDrive/Corel-1k/dataset/feature_extraction1.json') as f:
  training_indexed = json.load(f)

In [58]:
query_indexes = list(range(0, test_x.shape[0]))
label_builder = list(np.unique(training_indexed["labels"]))
class_builder = {label_unique:[] for label_unique in label_builder}
recalls = copy.deepcopy(class_builder)
precisions = copy.deepcopy(class_builder)
# loop over the testing indexes
for i in query_indexes:
    queryFeatures = features[i]
    results = perform_search(queryFeatures, training_indexed, max_results=12)
    labels_ret = [training_indexed["labels"][r[1]] for r in results]
    label_true = dataset_test[i].split("/")[7]
    label_trues = [label_true for _ in labels_ret]
    recall = metric.recall_score(label_trues, labels_ret, average='weighted')
    precision = metric.precision_score(label_trues, labels_ret, average='weighted')
    recalls[label_true].append(recall)
    precisions[label_true].append(precision)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

In [59]:
print("recall values:")
comb_recall, comb_precision = [], []
for key in recalls.keys():
    average_val = np.average(recalls[key])
    print(key, average_val)
    comb_recall.append(average_val)
print("combined recall", np.average(comb_recall))

print("\nprecision values:")
for key in precisions.keys():
    average_val = np.average(precisions[key])
    print(key, average_val)
    comb_precision.append(average_val)
print("combined precision", np.average(comb_precision))

recall values:
beaches 0.55
bus 1.0
dinosaurs 1.0
elephants 0.7583333333333333
flowers 1.0
foods 0.4333333333333333
horses 0.825
monuments 0.75
mountains_and_snow 0.775
peolpe_and_villages_in_Africa 0.625
combined recall 0.7716666666666667

precision values:
beaches 0.9
bus 1.0
dinosaurs 1.0
elephants 1.0
flowers 1.0
foods 0.9
horses 1.0
monuments 1.0
mountains_and_snow 1.0
peolpe_and_villages_in_Africa 1.0
combined precision 0.9800000000000001


In [60]:
# Import the libraries
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras.preprocessing import image
from PIL import Image

In [103]:
# Import the libraries
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.models import Model
from pathlib import Path
from PIL import Image
class FeatureExtractor:
    def __init__(self):
        base_model = VGG16(weights='imagenet')
        self.model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc1').output)
    def extract(self, img):
        img = img.resize((224, 224))
        img = img.convert('RGB')
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)
        feature = self.model.predict(x)[0]
        return feature / np.linalg.norm(feature)


fe = FeatureExtractor()
for img_path in sorted(dataset_train):
    print(img_path)
    feature = fe.extract(img=Image.open(img_path))
    feature_path = "/content/drive/MyDrive/Corel-1k/dataset.npy"
    np.save(feature_path, feature)

/content/drive/MyDrive/Corel-1k/dataset/training_set/beaches/110.jpg
/content/drive/MyDrive/Corel-1k/dataset/training_set/beaches/111.jpg
/content/drive/MyDrive/Corel-1k/dataset/training_set/beaches/112.jpg
/content/drive/MyDrive/Corel-1k/dataset/training_set/beaches/113.jpg
/content/drive/MyDrive/Corel-1k/dataset/training_set/beaches/114.jpg
/content/drive/MyDrive/Corel-1k/dataset/training_set/beaches/115.jpg
/content/drive/MyDrive/Corel-1k/dataset/training_set/beaches/116.jpg
/content/drive/MyDrive/Corel-1k/dataset/training_set/beaches/117.jpg
/content/drive/MyDrive/Corel-1k/dataset/training_set/beaches/118.jpg
/content/drive/MyDrive/Corel-1k/dataset/training_set/beaches/119.jpg
/content/drive/MyDrive/Corel-1k/dataset/training_set/beaches/120.jpg
/content/drive/MyDrive/Corel-1k/dataset/training_set/beaches/121.jpg
/content/drive/MyDrive/Corel-1k/dataset/training_set/beaches/122.jpg
/content/drive/MyDrive/Corel-1k/dataset/training_set/beaches/123.jpg
/content/drive/MyDrive/Corel-1k/da

In [133]:
# Import the libraries
import matplotlib.pyplot as plt
import numpy as np
# Insert the image query
img = Image.open("/content/drive/MyDrive/Corel-1k/dataset/test_set/beaches/100.jpg")
# Extract its features
query = fe.extract(img)
# Calculate the similarity (distance) between images
dists = np.linalg.norm(features - query, axis=1)
# Extract 30 images that have lowest distance
ids = np.argsort(dists)[:12]
scores = [(dists[id], img_path[id]) for id in ids]
# Visualize the result
axes=[]
fig=plt.figure(figsize=(8,8))
for a in range(5*6):
    score = scores[a]
    axes.append(fig.add_subplot(5, 6, a+1))
    subplot_title=str(score[0])
    axes[-1].set_title(subplot_title)  
    plt.axis('off')
    plt.imshow(Image.open(score[1]))
fig.tight_layout()
plt.show()

IndexError: ignored