<a href="https://colab.research.google.com/github/EdoardoMorucci/Plant-Leaves-Search-Engine---MIRCV/blob/main/feature_extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Connection to GDrive and download of all the datasets

In [1]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [None]:
! pip install -q kaggle

from google.colab import files
_ = files.upload()

! mkdir -p ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json

Saving kaggle.json to kaggle.json


In [None]:
! kaggle datasets download -d davidedemarco/healthy-unhealthy-plants-dataset-segmented --unzip


Downloading healthy-unhealthy-plants-dataset-segmented.zip to /content
100% 640M/642M [00:21<00:00, 46.6MB/s]
100% 642M/642M [00:21<00:00, 31.5MB/s]


In [None]:
# downaload the noise images from https://drive.google.com/file/d/1wsrC7u104hsAU6Wo0cII33TLC3oLpTkm/view?usp=sharing

! gdown --id 1wsrC7u104hsAU6Wo0cII33TLC3oLpTkm -O noise.zip

Downloading...
From: https://drive.google.com/uc?id=1wsrC7u104hsAU6Wo0cII33TLC3oLpTkm
To: /content/noise.zip
100% 3.08G/3.08G [00:56<00:00, 54.5MB/s]


In [None]:
! unzip -qq /content/noise.zip

## Imports

In [8]:
from os import listdir
from os.path import isfile, join
import pandas as pd
from tqdm.auto import tqdm

from IPython.display import display
from ipywidgets import HBox, Image, HTML

import tensorflow as tf
from tensorflow import keras

import numpy as np

from sklearn.preprocessing import normalize
from sklearn.metrics.pairwise import cosine_similarity

# Creation of DataFrames for noise images and leaves images

In [None]:
directories = ["Apple", "Blueberry", "Cherry", "Corn", "Grape", "Orange", "Peach", "Pepper", "Potato", "Raspberry", "Soybean", "Squash", "Strawberry", "Tomato"]
leaves_dataframe = pd.DataFrame(columns=['Image', 'Plant', 'Path'])
for plant in tqdm(directories):
  leaf_path = '/content/Healthy-Unhealthy-Plants-Dataset-Segmented/' + plant
  image_paths = listdir(leaf_path)
  for path in tqdm(image_paths):
    leaves_dataframe = leaves_dataframe.append({'Image': path, 'Plant': plant, 'Path': leaf_path}, ignore_index=True)



  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/4159 [00:00<?, ?it/s]

  0%|          | 0/1969 [00:00<?, ?it/s]

  0%|          | 0/2489 [00:00<?, ?it/s]

  0%|          | 0/5058 [00:00<?, ?it/s]

  0%|          | 0/5521 [00:00<?, ?it/s]

  0%|          | 0/7255 [00:00<?, ?it/s]

  0%|          | 0/3634 [00:00<?, ?it/s]

  0%|          | 0/3240 [00:00<?, ?it/s]

  0%|          | 0/2868 [00:00<?, ?it/s]

  0%|          | 0/635 [00:00<?, ?it/s]

  0%|          | 0/6706 [00:00<?, ?it/s]

  0%|          | 0/2409 [00:00<?, ?it/s]

  0%|          | 0/2238 [00:00<?, ?it/s]

  0%|          | 0/23853 [00:00<?, ?it/s]

In [None]:
leaves_dataframe



Unnamed: 0,Image,Plant,Path
0,6643566f-d980-4bdb-88d7-4d3ab3c771fa___FREC_Sc...,Apple,/content/Healthy-Unhealthy-Plants-Dataset-Segm...
1,753a02e9-3624-4692-a0c9-ccb78c8f916b___RS_HL 5...,Apple,/content/Healthy-Unhealthy-Plants-Dataset-Segm...
2,4c368acb-0a47-4c7f-84bd-29dc74660db9___RS_HL 5...,Apple,/content/Healthy-Unhealthy-Plants-Dataset-Segm...
3,99a971b6-cd13-4ed2-8fc5-0d6769945578___FREC_Sc...,Apple,/content/Healthy-Unhealthy-Plants-Dataset-Segm...
4,15f31b47-61da-4549-ad60-f13ef13a6512___JR_FrgE...,Apple,/content/Healthy-Unhealthy-Plants-Dataset-Segm...
...,...,...,...
72029,749e717c-f4d4-44ec-9265-1cb37aae66ca___UF.GRC_...,Tomato,/content/Healthy-Unhealthy-Plants-Dataset-Segm...
72030,93e37751-6fc2-41fb-b0fd-9639cba54c98___UF.GRC_...,Tomato,/content/Healthy-Unhealthy-Plants-Dataset-Segm...
72031,2a67b51d-1ad6-4ab9-8a1d-db599f8593f3___YLCV_NR...,Tomato,/content/Healthy-Unhealthy-Plants-Dataset-Segm...
72032,7620a603-c3cd-4893-9481-901c9950bb00___GH_HL L...,Tomato,/content/Healthy-Unhealthy-Plants-Dataset-Segm...


In [None]:
noise_path = "/content/mirflickr25k/mirflickr"
noise_images = [f for f in listdir(noise_path) if isfile(join(noise_path, f))]


In [None]:
noise_dataframe = pd.DataFrame(columns=['Image', 'Plant', 'Path'])

for image_path in tqdm(noise_images):
  noise_dataframe = noise_dataframe.append({'Image': image_path, 'Plant': "noise", 'Path': noise_path}, ignore_index=True)

  0%|          | 0/25000 [00:00<?, ?it/s]

In [None]:
noise_dataframe

Unnamed: 0,Image,Plant,Path
0,im23788.jpg,noise,/content/mirflickr25k/mirflickr
1,im321.jpg,noise,/content/mirflickr25k/mirflickr
2,im10948.jpg,noise,/content/mirflickr25k/mirflickr
3,im1177.jpg,noise,/content/mirflickr25k/mirflickr
4,im16324.jpg,noise,/content/mirflickr25k/mirflickr
...,...,...,...
24995,im8266.jpg,noise,/content/mirflickr25k/mirflickr
24996,im19378.jpg,noise,/content/mirflickr25k/mirflickr
24997,im14944.jpg,noise,/content/mirflickr25k/mirflickr
24998,im7647.jpg,noise,/content/mirflickr25k/mirflickr


In [None]:
frames = [leaves_dataframe, noise_dataframe]

total_dataframe = pd.concat(frames)

total_dataframe

Unnamed: 0,Image,Plant,Path
0,00348.jpg,Apple,/content/Healthy-Unhealthy-Plants-Dataset-Segm...
1,f415bc3e-3e71-4636-a3dd-78b65002384d___JR_FrgE...,Apple,/content/Healthy-Unhealthy-Plants-Dataset-Segm...
2,22717.jpg,Apple,/content/Healthy-Unhealthy-Plants-Dataset-Segm...
3,4dc8e2a8-4374-488a-afc6-fd0ea7f15c88___FREC_C....,Apple,/content/Healthy-Unhealthy-Plants-Dataset-Segm...
4,4b186022-c0ab-4a26-9930-c7944a3c431e___RS_HL 7...,Apple,/content/Healthy-Unhealthy-Plants-Dataset-Segm...
...,...,...,...
24995,im8266.jpg,noise,/content/mirflickr25k/mirflickr
24996,im19378.jpg,noise,/content/mirflickr25k/mirflickr
24997,im14944.jpg,noise,/content/mirflickr25k/mirflickr
24998,im7647.jpg,noise,/content/mirflickr25k/mirflickr


# Feature Extraction

## Normal Model

In [None]:
model = tf.keras.applications.densenet.DenseNet121(
    weights='imagenet', input_tensor=None, input_shape=(224,224,3), 
    pooling='avg', include_top=False
)

model.summary()

In [None]:
noise_paths = noise_dataframe.Path + '/' + noise_dataframe.Image


noise_features = []

image_width = 224
image_height = 224
image_size = (image_height, image_width)

for path in tqdm(noise_paths):
  image_pil = tf.keras.utils.load_img(path, target_size=image_size, interpolation='bilinear')
  image_np = tf.keras.preprocessing.image.img_to_array(image_pil)
  image_np = np.expand_dims(image_np, axis=0)  # add batch dimension
  image_feature_vector = model(image_np).numpy()
  noise_features.append(image_feature_vector)

noise_features = np.concatenate(noise_features, axis=0)

np.savez("noise_features_final.npz", noise_features)

  0%|          | 0/25000 [00:00<?, ?it/s]

In [None]:
image_paths = "/content/Healthy-Unhealthy-Plants-Dataset-Segmented/" + leaves_dataframe.Plant + '/' + leaves_dataframe.Image


features = []

image_width = 224
image_height = 224
image_size = (image_height, image_width)

for path in tqdm(image_paths):
  image_pil = tf.keras.utils.load_img(path, target_size=image_size, interpolation='bilinear')
  image_np = tf.keras.preprocessing.image.img_to_array(image_pil)
  image_np = np.expand_dims(image_np, axis=0)  # add batch dimension
  image_feature_vector = model(image_np).numpy()
  features.append(image_feature_vector)

features = np.concatenate(features, axis=0)
np.savez("features_final.npz", features)

  0%|          | 0/72034 [00:00<?, ?it/s]

Concatenate the two features vector and save it to drive

In [None]:
# feature https://drive.google.com/file/d/1MClSLlPJ4iAuYugiXuPa8z3MEDwR79SK/view?usp=sharing
# noise feature https://drive.google.com/file/d/1_T3vAVOgSMp9UxUhYuIp28AnDIngQO9d/view?usp=sharing
! gdown --id 1MClSLlPJ4iAuYugiXuPa8z3MEDwR79SK

a = np.load('features_final.npz')

features = a['arr_0']

!gdown --id 1_T3vAVOgSMp9UxUhYuIp28AnDIngQO9d

b = np.load('noise_features_final.npz')

noise_features = b['arr_0']


Downloading...
From: https://drive.google.com/uc?id=1MClSLlPJ4iAuYugiXuPa8z3MEDwR79SK
To: /content/features_final.npz
100% 295M/295M [00:02<00:00, 137MB/s]
Downloading...
From: https://drive.google.com/uc?id=1_T3vAVOgSMp9UxUhYuIp28AnDIngQO9d
To: /content/noise_features_final.npz
100% 102M/102M [00:00<00:00, 143MB/s]  


In [None]:
total_features = np.concatenate((features, noise_features), axis=0)
np.save("total_features.npy", total_features)


## Finetuned Model

In [None]:
finetuned_model = keras.models.load_model('/content/gdrive/Shareddrives/MIRCV-PlantLeavesSearchEngine/model_fine_tuned')
fixed_finetuned_model = keras.models.Model(inputs=finetuned_model.input, outputs=finetuned_model.get_layer('gap').output)
fixed_finetuned_model.summary()

In [None]:
noise_paths = noise_dataframe.Path + '/' + noise_dataframe.Image


noise_features = []

image_width = 224
image_height = 224
image_size = (image_height, image_width)

for path in tqdm(noise_paths):
  image_pil = tf.keras.utils.load_img(path, target_size=image_size, interpolation='bilinear')
  image_np = tf.keras.preprocessing.image.img_to_array(image_pil)
  image_np = np.expand_dims(image_np, axis=0)  # add batch dimension
  image_feature_vector = fixed_finetuned_model(image_np).numpy()
  noise_features.append(image_feature_vector)

noise_features = np.concatenate(noise_features, axis=0)

np.savez("noise_features_finetuned.npz", noise_features)

In [None]:
image_paths = "/content/Healthy-Unhealthy-Plants-Dataset-Segmented/" + leaves_dataframe.Plant + '/' + leaves_dataframe.Image


features = []

image_width = 224
image_height = 224
image_size = (image_height, image_width)

for path in tqdm(image_paths):
  image_pil = tf.keras.utils.load_img(path, target_size=image_size, interpolation='bilinear')
  image_np = tf.keras.preprocessing.image.img_to_array(image_pil)
  image_np = np.expand_dims(image_np, axis=0)  # add batch dimension
  image_feature_vector = fixed_finetuned_model(image_np).numpy()
  features.append(image_feature_vector)

features = np.concatenate(features, axis=0)
np.savez("features_fine_tuned.npz", features)

Concatenate the two features vector and save it to drive

In [3]:
# Finetuned feature https://drive.google.com/file/d/154NTeCkSUPE1tlI6Z-mM8BZ9om5hnzMp/view?usp=sharing
# Finetuned noise feature https://drive.google.com/file/d/1om-VfqmNK1-5v6M06C0LSOOWzUpB2n4G/view?usp=sharing
! gdown --id 154NTeCkSUPE1tlI6Z-mM8BZ9om5hnzMp

a = np.load('features_fine_tuned.npz')

features = a['arr_0']

!gdown --id 1om-VfqmNK1-5v6M06C0LSOOWzUpB2n4G

b = np.load('noise_features_finetuned.npz')

noise_features = b['arr_0']

Downloading...
From: https://drive.google.com/uc?id=154NTeCkSUPE1tlI6Z-mM8BZ9om5hnzMp
To: /content/features_fine_tuned.npz
100% 295M/295M [00:06<00:00, 44.5MB/s]
Downloading...
From: https://drive.google.com/uc?id=1om-VfqmNK1-5v6M06C0LSOOWzUpB2n4G
To: /content/noise_features_finetuned.npz
100% 102M/102M [00:01<00:00, 67.5MB/s] 


In [4]:
total_features = np.concatenate((features, noise_features), axis=0)

np.save("finetuned_features.npy", total_features)

In [None]:
def extract_features(query_path, model):
  model = model
  query_image_pil = tf.keras.utils.load_img(query_url, target_size=image_size, interpolation='bilinear')
  query_image_np = tf.keras.preprocessing.image.img_to_array(query_image_pil)
  query_image_np = np.expand_dims(query_image_np, axis=0)  # add batch dimension
  query_feature = model(query_image_np)
  return query_feature