# Install Packages

In [None]:
# !pip install mtcnn 

In [None]:
### Install this package if not already installed as we require this to get target faces

# Check GPU and Runtime

### To check if you are using high ram and GPU (Colab Pro) No need if running on local machine -- PS local machine might run out of RAM

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Fri Apr 15 06:28:32 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P0    26W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 27.3 gigabytes of available RAM

You are using a high-RAM runtime!


# Mount Drive

#### Mount Drive to get dataset, embeding outputs etc. 

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# FaceNet - Model Initialize

### FaceNet Weights can be downloaded here: https://drive.google.com/drive/folders/1I4dCNIwct9WAgzfPpV3wCwTIWzscPnuL?usp=sharing

In [None]:
import tensorflow as tf
from tensorflow.keras import models
from keras.models import Model

# read the model
model_path = "/content/drive/My Drive/reverse-visual-search/0004/FaceNet/model/facenet_keras.h5"
weight_path = "/content/drive/My Drive/reverse-visual-search/0004/FaceNet/weights/facenet_keras_weights.h5"

model = models.load_model(model_path, compile=False)

model.load_weights(weight_path)

In [None]:
model.summary()

Model: "inception_resnet_v1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 160, 160, 3  0           []                               
                                )]                                                                
                                                                                                  
 Conv2d_1a_3x3 (Conv2D)         (None, 79, 79, 32)   864         ['input_1[0][0]']                
                                                                                                  
 Conv2d_1a_3x3_BatchNorm (Batch  (None, 79, 79, 32)  96          ['Conv2d_1a_3x3[0][0]']          
 Normalization)                                                                                   
                                                                                

### Adding a Avg Pooling Layer

In [None]:
layer_name = 'AvgPool'
facenet= Model(inputs=model.input, outputs=model.get_layer(layer_name).output)

# Read Dataset

#### Download input from these folders, 
#### 1. Folder 0001: https://drive.google.com/drive/folders/1CtLdASAT8FpboPUgcW-PXiFcO2hxKA5U?usp=sharing
#### 2. Folder 0004: https://drive.google.com/drive/folders/1M4_SrEPMRo9PSNLWzPHPHN0MpPD4uzjB?usp=sharing

### This code can be run for train test split images however, takes some time to RUN, would recommend to run on local machine. As this is not the scope of this project we have provided the output of this step which download above. Download the folder and place '0001' & '0004' folder under reverse-visual-search folder in your drive.

### In this step we read the dataset we got from last step and prepare for MTCNN face detection, instead of reading the dataset from scratch you can run the line ""RUN HERE"" to skip data reading process. 

In [None]:
from matplotlib import image
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random

In [None]:
train = pd.read_csv(os.path.join("/content/drive/My Drive/reverse-visual-search/0001", "train.csv"), index_col=0)
test = pd.read_csv(os.path.join("/content/drive/My Drive/reverse-visual-search/0001", "test.csv"),  index_col=0)
labels = pd.read_csv(os.path.join("/content/drive/My Drive/reverse-visual-search/0001", "labels.csv"), index_col=0)

In [None]:
# X_train = list()
# for name in train["Name"]:
#     directory = name.split("0")[0][: -1]
#     X_train.append(image.imread(os.path.join("/content/drive/My Drive/reverse-visual-search/LFW", directory, name)))
# X_train = np.array(X_train)

In [None]:
# X_test = list()
# for name in test["Name"]:
#     directory = name.split("0")[0][: -1]
#     X_test.append(image.imread(os.path.join("/content/drive/My Drive/reverse-visual-search/LFW", directory, name)))
# X_test = np.array(X_test)

In [None]:
y_train = np.array(train["Label"])
y_test = np.array(test["Label"])

In [None]:
train_images = np.array(train["image"])
test_images = np.array(test["image"])

In [None]:
# X_train_reshape.shape

In [None]:
# X_test_reshape.shape

In [None]:
y_train.shape

(10586,)

In [None]:
y_test.shape

(2647,)

In [None]:
train_images.shape

(10586,)

In [None]:
test_images.shape

(2647,)

In [None]:
# np.save(os.path.join("0001",'X_train'), X_train)
# np.save(os.path.join("0001",'X_test'), X_test)

In [None]:
### ""RUN HERE"" ###
X_train = np.load(os.path.join("/content/drive/My Drive/reverse-visual-search/0001",'train_faces.npy'))
X_test = np.load(os.path.join("/content/drive/My Drive/reverse-visual-search/0001",'test_faces.npy'))

In [None]:
X_train.shape

(10586, 250, 250, 3)

In [None]:
X_test.shape

(2647, 250, 250, 3)

# Preprocessing for FACENET 

### In this step we read the dataset we got from last step and prepare for FACENET by reshaping and preprocessing, instead of preprocessing the dataset from scratch you can run the line ""RUN HERE"" to skip data preprocessing. 

## Reshaping

In [None]:
from PIL import Image


# Reshaping in 160 x 160 x 3

X_train_reshape = list()
count = 1
for index in range(X_train.shape[0]):
  image = Image.fromarray(X_train[index])
  image = image.resize((160, 160))
  face_array = np.asarray(image)
  X_train_reshape.append(face_array)
  print(f'Resized {count}')
  count += 1

X_train_reshape = np.array(X_train_reshape)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Resized 5587
Resized 5588
Resized 5589
Resized 5590
Resized 5591
Resized 5592
Resized 5593
Resized 5594
Resized 5595
Resized 5596
Resized 5597
Resized 5598
Resized 5599
Resized 5600
Resized 5601
Resized 5602
Resized 5603
Resized 5604
Resized 5605
Resized 5606
Resized 5607
Resized 5608
Resized 5609
Resized 5610
Resized 5611
Resized 5612
Resized 5613
Resized 5614
Resized 5615
Resized 5616
Resized 5617
Resized 5618
Resized 5619
Resized 5620
Resized 5621
Resized 5622
Resized 5623
Resized 5624
Resized 5625
Resized 5626
Resized 5627
Resized 5628
Resized 5629
Resized 5630
Resized 5631
Resized 5632
Resized 5633
Resized 5634
Resized 5635
Resized 5636
Resized 5637
Resized 5638
Resized 5639
Resized 5640
Resized 5641
Resized 5642
Resized 5643
Resized 5644
Resized 5645
Resized 5646
Resized 5647
Resized 5648
Resized 5649
Resized 5650
Resized 5651
Resized 5652
Resized 5653
Resized 5654
Resized 5655
Resized 5656
Resized 5657
Resized 5658

In [None]:
# Reshaping in 160 x 160 x 3

X_test_reshape = list()

for index in range(X_test.shape[0]):
  image = Image.fromarray(X_test[index])
  image = image.resize((160, 160))
  face_array = np.asarray(image)
  X_test_reshape.append(face_array)

X_test_reshape = np.array(X_test_reshape)

In [None]:
X_train_reshape.shape

(10586, 160, 160, 3)

In [None]:
X_test_reshape.shape

(2647, 160, 160, 3)

## Normalization

In [None]:
def preprocess(face):
    # scale pixel values
    face = face.astype('float32')
    # standardization
    mean, std = face.mean(), face.std()
    face = (face-mean)/std
    return face

In [None]:
train_preprocess = list()
count = 1
for train in X_train_reshape:
  train_preprocess.append(preprocess(train))
  print(f'pre process {count}')
  count += 1
train_preprocess = np.array(train_preprocess)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
pre process 5587
pre process 5588
pre process 5589
pre process 5590
pre process 5591
pre process 5592
pre process 5593
pre process 5594
pre process 5595
pre process 5596
pre process 5597
pre process 5598
pre process 5599
pre process 5600
pre process 5601
pre process 5602
pre process 5603
pre process 5604
pre process 5605
pre process 5606
pre process 5607
pre process 5608
pre process 5609
pre process 5610
pre process 5611
pre process 5612
pre process 5613
pre process 5614
pre process 5615
pre process 5616
pre process 5617
pre process 5618
pre process 5619
pre process 5620
pre process 5621
pre process 5622
pre process 5623
pre process 5624
pre process 5625
pre process 5626
pre process 5627
pre process 5628
pre process 5629
pre process 5630
pre process 5631
pre process 5632
pre process 5633
pre process 5634
pre process 5635
pre process 5636
pre process 5637
pre process 5638
pre process 5639
pre process 5640
pre process 5641


In [None]:
test_preprocess = list()
for test in X_test_reshape:
  test_preprocess.append(preprocess(test))
test_preprocess = np.array(test_preprocess)

In [None]:
train_preprocess.shape

(10586, 160, 160, 3)

In [None]:
test_preprocess.shape

(2647, 160, 160, 3)

In [None]:
np.save(os.path.join("/content/drive/My Drive/reverse-visual-search/0004",'train_preprocess'), train_preprocess)
np.save(os.path.join("/content/drive/My Drive/reverse-visual-search/0004",'test_preprocess'), test_preprocess)

In [None]:
### ""RUN HERE"" ###
X_train_reshape = np.load(os.path.join("/content/drive/My Drive/reverse-visual-search/0004",'train_preprocess.npy'))
X_test_reshape = np.load(os.path.join("/content/drive/My Drive/reverse-visual-search/0004",'test_preprocess.npy'))

# Features

### In this step we will extract the embeddings from FaceNet. FaceNet model was downloaded in above section.

In [None]:
X_train_features = facenet.predict(X_train_reshape)

In [None]:
X_train_features.shape

(10586, 1792)

In [None]:
X_test_features = facenet.predict(X_test_reshape)

In [None]:
X_test_features.shape

(2647, 1792)

In [None]:
# np.savetxt(os.path.join("/content/drive/My Drive/reverse-visual-search/0004", "X_train_features.txt"), X_train_features)
# np.savetxt(os.path.join("/content/drive/My Drive/reverse-visual-search/0004", "X_test_features.txt"), X_test_features)
# np.savetxt(os.path.join("/content/drive/My Drive/reverse-visual-search/0004", "y_train.txt"), y_train)
# np.savetxt(os.path.join("/content/drive/My Drive/reverse-visual-search/0004", "y_test.txt"), y_test)
# np.savetxt(os.path.join("/content/drive/My Drive/reverse-visual-search/0004", "train_images.txt"), train_images)
# np.savetxt(os.path.join("/content/drive/My Drive/reverse-visual-search/0004", "test_images.txt"), test_images)

In [None]:
X_train_features = np.loadtxt(os.path.join("0004",'X_train_features.txt'))
X_test_features = np.loadtxt(os.path.join("0004",'X_test_features.txt'))

# KNN

### In this step we will quickly get results from KNN to see if facenet had some positive influence 

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

In [None]:
neigh = KNeighborsClassifier(n_neighbors=3)

In [None]:
neigh.fit(X_train_features, y_train)

KNeighborsClassifier(n_neighbors=3)

In [None]:
y_predictions = neigh.predict(X_test_features)

In [None]:
accuracy_score(y_test, y_predictions)

0.5481677370608236

### We can clerly see that FACENET is better than ResNet-50 - Now Lets work on Milvus