In [None]:
### perform YOLO v3 object detection in OpenCV:
### Download the “yolov3.weights”, and “yolov3.cfg” files from the following website. 
# https://pjreddie.com/darknet/yolo/
### YOLO v3 object detection OpenCV code.

import cv2
import numpy as np

## load YOLO3 weights and cfg file
net = cv2.dnn.readNet("yolov3.weights","yolov3.cfg") 

## load the class values
classes = []
with open("coco.names","r") as f:
    classes = [line.strip() for line in f.readlines()]
#print(classes)

## get the convolution layer
layer_names = net.getLayerNames()
outputlayers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
 
colors= np.random.uniform(0,255,size=(len(classes),3))
 
#loading image or video
cap=cv2.VideoCapture("dog.mp4") #0 for 1st webcam
font = cv2.FONT_HERSHEY_SIMPLEX

# Get the width and height of the video frames
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
vids = cv2.VideoWriter('yolo.mp4', fourcc, 30, (width, height))

while True:
    ret,frame= cap.read() # read the frame
    
    if not ret: break
    
    height,width,channels = frame.shape
    
    #detecting objects, blob conversion which is basically extracting features from image
    blob = cv2.dnn.blobFromImage(frame,0.00392,(320,320),(0,0,0),True,crop=False) #reduce 416 to 320         
    net.setInput(blob)
    outs = net.forward(outputlayers)

    #Showing info on screen/ get confidence score of algorithm in detecting an object in blob
    class_ids=[]
    confidences=[]
    boxes=[]
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.3:
                
                #object detected
                center_x= int(detection[0]*width)
                center_y= int(detection[1]*height)
                w = int(detection[2]*width)
                h = int(detection[3]*height)
                
                #rectangle co-ordinaters
                x=int(center_x - w/2)
                y=int(center_y - h/2)

                boxes.append([x,y,w,h]) #put all rectangle areas
                confidences.append(float(confidence)) #how confidence was that object detected and show that percentage
                class_ids.append(class_id) #name of the object tha was detected
                
                
    # any box having value less than 0.6- that will be removed
    indexes = cv2.dnn.NMSBoxes(boxes,confidences,0.4,0.6)

    # put the text values on the frame
    for i in range(len(boxes)):
        if i in indexes:
            x,y,w,h = boxes[i]
            label = str(classes[class_ids[i]])
            confidence= confidences[i]
            color = colors[class_ids[i]]
            cv2.rectangle(frame,(x,y),(x+w,y+h),color,2)
            cv2.putText(frame,label+" "+str(round(confidence,2)),(x,y+30),font,1,(255,255,255),2)
      
    #writing the frame 
    vids.write(frame) 
    #wait 1ms the loop will start again and we will process the next frame
    cv2.imshow("Video",frame)   
    if cv2.waitKey(25) & 0xFF == ord('q'): #Q key stops the process
        break;
        
        
# break the loop        
cap.release()   
vids.release()
cv2.destroyAllWindows()


In [None]:
# perform YOLO v5 and a custom dataset using TensorFlow:
# 1.Go to Google Colab and create a new notebook.
# https://colab.research.google.com/ 
# 2.Change the Runtime type to GPU as a hardware accelerator by following the steps.
# Click Runtime → Change Runtime type → select GPU in hardware accelerator dropdown → click Save.
# 3.Clone the yolov5 repository by using the following code.
# ! git clone https://github.com/ultralytics/yolov5.git
# 4.Install YOLO v5 Dependencies in the Colab notebook.

! pip install -U -r requirements.txt

import torch
import os
from iPython.display import Image, clear_output
# from utils.google_utils import gdrive_download

clear_output()

%cd /content/yolov5/

# 5.Download the custom (fish images) data set that you are interested in (using the Roboflow api).

# https://public.roboflow.com/

!curl -L "https://public.roboflow.com/ds/lujgbDXgkE?key=CgA5u2f1oB" > roboflow.zip; unzip roboflow.zip; rm roboflow.zip


# Create the custom model configuration file

#extracting information from the roboflow file
%cat /content/yolov5/data.yaml


# 6.Define the number of classes based on data.yaml.

import yaml
with open("data.yaml", 'r') as stream:
    num_classes = str(yaml.safe_load(stream)['nc'])

%cat /content/yolov5/models/yolov5s.yaml


# 7.Customize iPython write file so we can write variables.

from IPython.core.magic import register_line_cell_magic

@register_line_cell_magic
def writetemplate(line, cell):
    with open(line, 'w') as f:
        f.write(cell.format(**globals()))

%%writetemplate /content/yolov5/models/custom_yolov5s.yaml

# parameters
nc: {num_classes}  # number of classes
depth_multiple: 0.33  # model depth multiple
width_multiple: 0.50  # layer channel multiple

# anchors
anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32

# YOLOv5 backbone
backbone:
  # [from, number, module, args]
  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, BottleneckCSP, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
   [-1, 9, BottleneckCSP, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, BottleneckCSP, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
   [-1, 1, SPP, [1024, [5, 9, 13]]],
   [-1, 3, BottleneckCSP, [1024, False]],  # 9
  ]

# YOLOv5 head
head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 3, BottleneckCSP, [512, False]],  # 13

   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)

   [-1, 1, Conv, [256, 3, 2]],
   [[-1, 14], 1, Concat, [1]],  # cat head P4
   [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)

   [-1, 1, Conv, [512, 3, 2]],
   [[-1, 10], 1, Concat, [1]],  # cat head P5
   [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)

   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
  ]



# 8.Train yolov5 on the custom images using the custom configuration file.

# train yolov5s on custom data for 100 epochs
# time its performance
%%time
%cd /content/yolov5/
!python train.py --img 416 --batch 16 --epochs 100 --data ./data.yaml --cfg ./models/custom_yolov5s.yaml --weights '' --name yolov5s_results  --cache

# 9.Run yolov5 detection on images.

# run yolov5 detection on images.

# copy the location of the weights file and replace it in the following code 

!python detect.py --weights /content/yolov5/runs/train/yolov5s_results/weights/best.pt --img 416 --conf 0.4 --source ./test/images


import glob
from IPython.display import Image, display

for imageName in glob.glob('/content/yolov5/runs/detect/exp3/*.jpg'):
    display(Image(filename=imageName))
    print("\n")
    
    
# copy the location of the weights file and replace it in the following code 

!python detect.py --weights runs/train/yolov5s_results4/weights/best.pt --img 416 --conf 0.5 --source ../aquarium.mp4



In [None]:
### Refer to the following code for the output of the face recognition code.

from architecture import * 
import os 
import cv2
import mtcnn
import pickle 
import numpy as np 
from sklearn.preprocessing import Normalizer
from tensorflow.models import load_model



face_data = 'Faces/'
required_shape = (160,160)
face_encoder = InceptionResNetV2()
path = "/Face recognition/facenet_keras.h5"
face_encoder.load_weights(path)
face_detector = mtcnn.MTCNN()
encodes = []
encoding_dict = dict()
l2_normalizer = Normalizer('l2')

import cv2 
import numpy as np
import mtcnn
from architecture import *
from sklearn.preprocessing import Normalizer
from scipy.spatial.distance import cosine
from keras.models import load_model
import pickle



# 7.Train the dataset using the train.py file and test the code using the face_reg.py file using the following code.
l2_normalizer = Normalizer('l2')

def normalize(img):
    mean, std = img.mean(), img.std()
    return (img - mean) / std

confidence_t=0.99
recognition_t=0.5
required_size = (160,160)

def get_face(img, box):
    x1, y1, width, height = box
    x1, y1 = abs(x1), abs(y1)
    x2, y2 = x1 + width, y1 + height
    face = img[y1:y2, x1:x2]
    return face, (x1, y1), (x2, y2)

def get_encode(face_encoder, face, size):
    face = normalize(face)
    face = cv2.resize(face, size)
    encode = face_encoder.predict(np.expand_dims(face, axis=0))[0]
    return encode


def load_pickle(path):
    with open(path, 'rb') as f:
        encoding_dict = pickle.load(f)
    return encoding_dict

def detect(img ,detector,encoder,encoding_dict):
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = detector.detect_faces(img_rgb)
    for res in results:
        if res['confidence'] < confidence_t:
            continue
        face, pt_1, pt_2 = get_face(img_rgb, res['box'])
        encode = get_encode(encoder, face, required_size)
        encode = l2_normalizer.transform(encode.reshape(1, -1))[0]
        name = 'unknown'

        distance = float("inf")
        for db_name, db_encode in encoding_dict.items():
            dist = cosine(db_encode, encode)
            if dist < recognition_t and dist < distance:
                name = db_name
                distance = dist

        if name == 'unknown':
            cv2.rectangle(img, pt_1, pt_2, (0, 0, 255), 2)
            cv2.putText(img, name, pt_1, cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 1)
        else:
            cv2.rectangle(img, pt_1, pt_2, (0, 255, 0), 2)
            cv2.putText(img, name , (pt_1[0], pt_1[1] - 5), cv2.FONT_HERSHEY_SIMPLEX, 1,
                        (0, 0, 255), 2)
    return img 



if __name__ == "__main__":
    required_shape = (160,160)
    face_encoder = InceptionResNetV2()
    path_m = "facenet_keras.h5"
    face_encoder.load_weights(path_m)
    encodings_path = 'encodings.pkl'
    face_detector = mtcnn.MTCNN()
    encoding_dict = load_pickle(encodings_path)
    
    cap = cv2.VideoCapture("videoplayback.mp4")

    while cap.isOpened():
        ret,frame = cap.read()

        if not ret:
            print("CAM NOT OPEND") 
            break
        
        frame= detect(frame , face_detector , face_encoder , encoding_dict)

        cv2.imshow("frame", frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break




In [None]:
### Real-time age prediction using TensorFlow with images and OpenCV:

# 1.Download the “age-model.h5” files from the following website. 
# https://www.kaggle.com/datasets/mugeshraja/real-time-age-prediction
# 2.Load the “age-model.h5” files and run the following code and load the  ‘haarcascade_frontalface_default.xml’ file. 


import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model

# Load the pre-trained ResNet-50 model
model = load_model('age-model.h5')
# Load the Haar Cascade classifier for face detection
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')


def predict_age(image):
    # Preprocess the image
    image = cv2.imread(image)
    image = cv2.resize(image, (224, 224))
    image = np.expand_dims(image, axis=0)

    # Use the fully connected layer to predict the age of the person in the image
    age =round(model.predict(image)[0][0])
    return age


predict_age("image.jpg")



# Here's an example of how to perform Real-time age prediction using TensorFlow with videos and OpenCV:

import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model

# Load the pre-trained ResNet-50 model
model = load_model('age-model.h5')
# Load the Haar Cascade classifier for face detection
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

# Define a function to predict the age of a person from an image
def predict_age(image):
    # Preprocess the image
    image = cv2.resize(image, (224, 224))
    image = np.expand_dims(image, axis=0)


    # Use the ResNet-50 model to extract features from the image
    age =round(model.predict(image)[0][0])
   

    return age

# Define a function to capture video from the default camera and predict the age of the person in each frame
def real_time_age_prediction():
    cap = cv2.VideoCapture("videoplayback.mp4")
    while True:
        ret, frame = cap.read()

        # Predict the age of the person in the frame
        age = predict_age(frame)

        # Display the age on the frame
        cv2.putText(frame, str(age), (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

        
        # Convert the image to grayscale for face detection
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Detect faces in the grayscale image using the Haar Cascade classifier
        faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5)

        # Draw rectangles around the detected faces
        for (x, y, w, h) in faces:
            cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)



        # Display the resulting frame
        cv2.imshow('frame', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release the capture and destroy the window
    cap.release()
    cv2.destroyAllWindows()

# Call the function for real-time age prediction
real_time_age_prediction()


In [None]:
# perform Real-time age prediction using TensorFlow with OpenCV:
# 1.Download the emotion dataset files from the following website. 
# https://www.kaggle.com/datasets/jonathanoheix/face-expression-recognition-dataset

    
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,Activation,Flatten,BatchNormalization
from tensorflow.keras.layers import Conv2D,MaxPooling2D
import os

print(tf.__version__)

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

## assign the image size and class
num_classes = 5  # ['Angry','Happy','Neutral','Sad','Surprise'] 
img_rows,img_cols = 48,48
batch_size = 32

## give the path to training and validation datasets
train_data_dir = 'Emotion Dataset\\fer2013\\train'
validation_data_dir = 'Emotion Dataset\\fer2013\\validation'

## split the data
train_datagen = ImageDataGenerator(rescale=1./255,rotation_range=30,shear_range=0.3,
                                   zoom_range=0.3,width_shift_range=0.4,
                                   height_shift_range=0.4,horizontal_flip=True,fill_mode='nearest')
     

validation_datagen = ImageDataGenerator(rescale=1./255)
     

train_generator = train_datagen.flow_from_directory(train_data_dir,color_mode='grayscale',
                                                    target_size=(img_rows,img_cols),batch_size=batch_size,
                                                    class_mode='categorical',shuffle=True)
     

validation_generator = validation_datagen.flow_from_directory(validation_data_dir,color_mode='grayscale',
                                                    target_size=(img_rows,img_cols),batch_size=batch_size,
                                                    class_mode='categorical',shuffle=True)

## build the model
model = Sequential()


model.add(Conv2D(32,(3,3),padding='same',kernel_initializer='he_normal',input_shape=(img_rows,img_cols,1)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(Conv2D(32,(3,3),padding='same',kernel_initializer='he_normal',input_shape=(img_rows,img_cols,1)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))
     

model.add(Conv2D(64,(3,3),padding='same',kernel_initializer='he_normal'))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(Conv2D(64,(3,3),padding='same',kernel_initializer='he_normal'))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))
     

model.add(Conv2D(128,(3,3),padding='same',kernel_initializer='he_normal'))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(Conv2D(128,(3,3),padding='same',kernel_initializer='he_normal'))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))
     

model.add(Conv2D(256,(3,3),padding='same',kernel_initializer='he_normal'))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(Conv2D(256,(3,3),padding='same',kernel_initializer='he_normal'))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))
     

model.add(Flatten())
model.add(Dense(64,kernel_initializer='he_normal'))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
     

model.add(Dense(64,kernel_initializer='he_normal'))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
     

model.add(Dense(num_classes,kernel_initializer='he_normal'))
model.add(Activation('softmax'))
     

print(model.summary())



## Save the Model
checkpoint = ModelCheckpoint('Emotion_model.h5',
                             monitor='val_loss',
                             mode='min',
                             save_best_only=True,
                             verbose=1)

earlystop = EarlyStopping(monitor='val_loss',
                          min_delta=0,
                          patience=3,
                          verbose=1,
                          restore_best_weights=True
                          )

reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                              factor=0.2,
                              patience=3,
                              verbose=1,
                              min_delta=0.0001)
     

callbacks = [earlystop,checkpoint,reduce_lr]

## Compile the CNN Model
model.compile(loss='categorical_crossentropy',
              optimizer = Adam(lr=0.001),
              metrics=['accuracy'])
     

nb_train_samples = 24176
nb_validation_samples = 3006
epochs=25
     

history=model.fit_generator(
                train_generator,
                steps_per_epoch=nb_train_samples//batch_size,
                epochs=epochs,
                callbacks=callbacks,
                validation_data=validation_generator,
                validation_steps=nb_validation_samples//batch_size)

## Plot the Train and Validation Accuracy_Loss
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
     

def plot_learningCurve(history):
  # Plot training & validation accuracy values
    epoch_range = range(1, 12)
    plt.plot(epoch_range, history.history['accuracy'])
    plt.plot(epoch_range, history.history['val_accuracy'])
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Val'], loc='upper left')
    plt.show()

  # Plot training & validation loss values
    plt.plot(epoch_range, history.history['loss'])
    plt.plot(epoch_range, history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Val'], loc='upper left')
    plt.show()
     

plot_learningCurve(history)


In [None]:
### Download the ‘haarcascade_frontalface_default.xml’ file and test the emotion model using the following code.


import cv2
import numpy as np

print(cv2.__version__)

import datetime
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing import image
face_classifier = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
classifier =load_model('Emotion_model.h5')

class_labels = ['Angry','Happy','Neutral','Sad','Surprise']


cap = cv2.VideoCapture(0);

print(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
print(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the codec and create VideoWriter object 
fourcc =cv2.VideoWriter_fourcc(*'MJPG')      
out =cv2.VideoWriter('output.avi', fourcc, 20.0 ,(640, 480))


while True:
    # Grab a single frame of video
    ret, frame = cap.read()
    labels = []
    gray = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)
    faces = face_classifier.detectMultiScale(gray,1.3,5)
    
    font = cv2.FONT_HERSHEY_SIMPLEX
        
    text = 'Width:' + str(cap.get(3)) + "Height:" + str(cap.get(4))
    datet = str(datetime.datetime.now())

    txt = cv2.putText(frame , datet, (10, 50),font, 1 , (0,255,255) , 2, cv2.LINE_AA)

    for (x,y,w,h) in faces:
        cv2.rectangle(frame,(x,y),(x+w,y+h),(255,0,0),2)
        roi_gray = gray[y:y+h,x:x+w]
        roi_gray = cv2.resize(roi_gray,(48,48),interpolation=cv2.INTER_AREA)
    # rect,face,image = face_detector(frame)


        if np.sum([roi_gray])!=0:
            roi = roi_gray.astype('float')/255.0
            roi = img_to_array(roi)
            roi = np.expand_dims(roi,axis=0)

        # make a prediction on the ROI, then lookup the class

            preds = classifier.predict(roi)[0]
            label=class_labels[preds.argmax()]
            label_position = (x,y)
            cv2.putText(frame,label,label_position,font,2,(0,255,0),3)
        else:
            cv2.putText(frame,'No Face Found',(20,60),font,2,(0,255,0),3)
            
    cv2.imshow('Emotion Detector',frame)
    
    out.write(frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
out.release()
cv2.destroyAllWindows()


In [None]:
### Run the following code to perform CBIR. 
from zipfile import ZipFile
import cv2

import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D
from tensorflow.keras.models import Model
from tensorflow.keras.models import load_model
import matplotlib.pyplot as plt
%matplotlib inline
from glob import glob
import cv2, os
import numpy as np
import matplotlib.pyplot as plt
import keras
from keras.preprocessing import image


6.	Unzip the file.
file_name = "/dataset.zip"

with ZipFile(file_name, 'r') as zip:
    zip.extractall()
    print('done')


# display the image
img = cv2.imread('/dataset/1001.jpg', -1)
from google.colab.patches import cv2_imshow
cv2_imshow(img)


## Load the images.
path = '/dataset'

img_dataset = []
def load_img():
    img_path = os.path.join(path, "*")
    for im in glob(img_path):
        img  = cv2.imread(im)
        data = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        data = cv2.resize(data, (512, 512))
        data = image.img_to_array(data)
        img_dataset.append(data)
    print('All images loaded')
load_img()
# All images loaded
print('Number of the Images: {}'.format(len(img_dataset)))


## display the random images 
import random
plt.figure(figsize=(12,8))

for i in range(5):
    plt.subplot(1, 5, i+1)
    img = image.array_to_img(random.choice(img_dataset))
    plt.imshow(img)
    plt.axis('off')
    
plt.show()

### Split the data.
end =round(len(img_dataset) * 0.95)

X_train =img_dataset[:end]
X_test  =img_dataset[end:]
print(len(X_train) ,len(X_test))

## Normalize the data
X_train = np.asarray(X_train) / 255
X_test = np.asarray(X_test) / 255

## Reshape the data to have 1 channel
print(X_train.shape, X_test.shape)

X_train = np.reshape(X_train, (-1, 512, 512, 1))
X_test = np.reshape(X_test, (-1, 512, 512, 1))
print(X_train.shape, X_test.shape)



### Create the autoencoder.
input_img = Input(shape=(512,512,1))
x = Conv2D(32,(3,3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2,2), padding='same')(x)
x = Conv2D(16,(3,3), activation='relu', padding='same')(x)
x = MaxPooling2D((2,2), padding='same')(x)
x = Conv2D(8,(3,3), activation='relu', padding='same')(x)
x = MaxPooling2D((2,2), padding='same')(x)
x = Conv2D(4,(3,3), activation='relu', padding='same')(x)
x = MaxPooling2D((2,2), padding='same')(x)
x = Conv2D(2,(3,3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2,2), padding='same', name='encoder')(x)

x = Conv2D(2, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Conv2D(4, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(32, (3, 3), activation='relu',  padding='same')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)

autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.summary()


### Train and save the model.
autoencoder.fit(X_train, X_train, epochs=4, batch_size=16, callbacks=None );
autoencoder.save('autoencoder.h5')

# Create the encoder part
# The encoder part is the first half of the autoencoder, i.e. the part that will encode the input into a latent space representation. In this case, the dimension of this representation is 

autoencoder =Model('/content/autoencoder.h5')
encoder = Model(inputs=autoencoder.input, outputs=autoencoder.get_layer('encoder').output)
encoder.save('encoder.h5')
encoder =Model('/content/encoder.h5')

# Load the query image
# We take a query image from the test set

query = X_test[8]
plt.imshow(query.reshape(512,512), cmap='gray');

# Encode the test images and the query image
X_test.shape
# We remove the query image from the test set (the set in which we will search for close images)

X_test = np.delete(X_test, 8, axis=0)
X_test.shape


# Encode the query image and the test set
codes = encoder.predict(X_test)
query_code = encoder.predict(query.reshape(1,512,512,1))
codes.shape

query_code.shape

a=1
for i in query_code[0].shape:
    a = a*i
print(a)


### Find the closest images.
# We will find the 9 closest images
from sklearn.neighbors import NearestNeighbors


n_neigh = 9
codes = codes.reshape(-1, a); print(codes.shape)
query_code = query_code.reshape(1, a); print(query_code.shape)

# Fit the KNN to the test set
nbrs = NearestNeighbors(n_neighbors=n_neigh).fit(codes)
distances, indices = nbrs.kneighbors(np.array(query_code))
closest_images = X_test[indices]
closest_images = closest_images.reshape(-1,512,512,1); print(closest_images.shape)


### Get the closest images.
plt.imshow(query.reshape(512,512), cmap='gray');

plt.figure(figsize=(20, 6))
for i in range(n_neigh):
    # display original
    ax = plt.subplot(1, n_neigh, i+1)
    plt.imshow(closest_images[i].reshape(512, 512))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    
plt.show()
