<a href="https://colab.research.google.com/github/KurniaKhaikal/Corel-CBIR/blob/main/VGG16_CBIR_DoneFix2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import json
import os
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model

In [2]:
from imutils import paths
import cv2
from sklearn.preprocessing import LabelBinarizer

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
base_dataset = "/content/drive/MyDrive/Corel-1k/dataset"
class_dir = ['beaches', 'bus', 'dinosaurs',
             'elephants', 'flowers', 'foods',
             'horses', 'monuments', 'mountains_and_snow','peolpe_and_villages_in_Africa'
             ]
IMAGE_SIZE = (224, 224)

In [8]:
# indexing file images
dataset_train = []
for class_item in class_dir:
    cur_dir = os.path.join(base_dataset, 'training_set', class_item)
    for file in os.listdir(cur_dir):
        dataset_train.append(os.path.join(cur_dir, file))

In [9]:
print("train:", len(dataset_train))

train: 800


In [10]:
print("[INFO] load images Corel-Image 1k dataset...")
#  load images
train_images = []
for image_path in dataset_train:
    if ".jpg" or ".jpeg" in image_path:
        image = cv2.imread(image_path)
        image = cv2.resize(image, IMAGE_SIZE)
        train_images.append(image)

[INFO] load images Corel-Image 1k dataset...


In [11]:
# normalization
print("[INFO] normalization...")
train_x = np.array(train_images).astype("float32") / 255.0

[INFO] normalization...


In [12]:
from tensorflow.keras.applications.vgg16 import VGG16
modelVGG16 = VGG16(weights="imagenet")
model = Model(inputs=modelVGG16.input, outputs=modelVGG16.get_layer("fc1").output)
model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     14758

In [13]:
features = model.predict(train_x)



In [14]:
indexes = list(range(0, train_x.shape[0]))
features_array = [[float(x) for x in y] for y in features]
labels = [path.split("/")[7] for path in dataset_train]
data_train = {"indexes": indexes, "features": features_array, "locations": dataset_train, "labels":labels}

In [16]:
with open('/content/drive/MyDrive/Corel-1k/feature_extraction.json', 'w') as f:
    json.dump(data_train, f)

**RETRIEVAL**

In [17]:
import copy
import numpy as np
import os
import json
import cv2
import sklearn.metrics as metric
from tensorflow.keras.models import Model
import pandas as pd

In [18]:
def euclidean(a, b):
	return np.linalg.norm(a - b)

In [19]:
def perform_search(query_features, indexed_train, max_results=5):
	retrieved = []
	for idx in range(0, len(indexed_train["features"])):
		distance = euclidean(query_features, indexed_train["features"][idx])
		retrieved.append((distance, idx))
	retrieved = sorted(retrieved)[:max_results]
	return retrieved

In [20]:
base_dataset = "/content/drive/MyDrive/Corel-1k/dataset"
class_dir = ['beaches', 'bus', 'dinosaurs',
             'elephants', 'flowers', 'foods',
             'horses', 'monuments', 'mountains_and_snow','peolpe_and_villages_in_Africa'
             ]

IMAGE_SIZE = (224, 224)

In [21]:
# indexing file images
dataset_test = []
for class_item in class_dir:
    cur_dir = os.path.join(base_dataset, 'test_set', class_item)
    for file in os.listdir(cur_dir):
        dataset_test.append(os.path.join(cur_dir, file))

In [22]:
print("len to retrieving:", len(dataset_test))

len to retrieving: 200


In [23]:
print("[INFO] load images Corel-Image 1k dataset...")
#  load images
test_images = []
for image_path in dataset_test:
    if ".jpg" or ".jpeg" in image_path:
        image = cv2.imread(image_path)
        image = cv2.resize(image, IMAGE_SIZE)
        test_images.append(image)

[INFO] load images Corel-Image 1k dataset...


In [24]:
print("[INFO] normalization...")
test_x = np.array(test_images).astype("float32") / 255.0

[INFO] normalization...


In [25]:
from tensorflow.keras.applications import VGG16
modelVGG16 = VGG16(weights="imagenet")
model = Model(inputs=modelVGG16.input, outputs=modelVGG16.get_layer("fc1").output)
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0   

In [26]:
features = model.predict(test_x)



In [27]:
with open('/content/drive/MyDrive/Corel-1k/feature_extraction.json') as f:
  training_indexed = json.load(f)

In [28]:
query_indexes = list(range(0, test_x.shape[0]))
label_builder = list(np.unique(training_indexed["labels"]))
class_builder = {label_unique:[] for label_unique in label_builder}
recalls = copy.deepcopy(class_builder)
precisions = copy.deepcopy(class_builder)
# loop over the testing indexes
for i in query_indexes:
    queryFeatures = features[i]
    results = perform_search(queryFeatures, training_indexed, max_results=5)
    labels_ret = [training_indexed["labels"][r[1]] for r in results]
    label_true = dataset_test[i].split("/")[7]
    label_trues = [label_true for _ in labels_ret]
    recall = metric.recall_score(label_trues, labels_ret, average='weighted')
    precision = metric.precision_score(label_trues, labels_ret, average='weighted')
    recalls[label_true].append(recall)
    precisions[label_true].append(precision)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

In [29]:
print("recall values:")
comb_recall, comb_precision = [], []
for key in recalls.keys():
    average_val = np.average(recalls[key])
    print(key, average_val)
    comb_recall.append(average_val)
print("combined recall", np.average(comb_recall))

print("\nprecision values:")
for key in precisions.keys():
    average_val = np.average(precisions[key])
    print(key, average_val)
    comb_precision.append(average_val)
print("combined precision", np.average(comb_precision))

recall values:
beaches 0.5700000000000001
bus 0.99
dinosaurs 1.0
elephants 0.8300000000000001
flowers 1.0
foods 0.72
horses 0.8700000000000001
monuments 0.78
mountains_and_snow 0.8
peolpe_and_villages_in_Africa 0.64
combined recall 0.82

precision values:
beaches 0.85
bus 1.0
dinosaurs 1.0
elephants 1.0
flowers 1.0
foods 0.95
horses 0.95
monuments 0.9
mountains_and_snow 1.0
peolpe_and_villages_in_Africa 0.9
combined precision 0.9550000000000001
