# Image recognition with Python, OpenCV, OpenAI CLIP model and PostgreSQL `pgvector` 

This repository contains the working code for the example in the [blog post](https://aiven.io/developer/find-faces-with-pgvector)

The below is the overall flow:

![Overall flow](entire_flow.jpg)

## Step 0: Install requirements

In [None]:
!pip install -r requirements.txt
!pip install ipyplot

## Step 1: Face recognition

Detect the faces from the [test-image](test-image.png) picture and store them under the `stored-faces` folder

In [None]:
# importing the cv2 library
import cv2
import os

# loading the haar case algorithm file into alg variable
alg = "haarcascade_frontalface_default.xml"
# passing the algorithm to OpenCV
haar_cascade = cv2.CascadeClassifier(alg)
# loading the image path into file_name variable - replace <INSERT YOUR IMAGE NAME HERE> with the path to your image

def detect_faces(folder):
    for file_name in os.listdir("train/"+folder):
        img = cv2.imread("train/"+folder+"/"+file_name, 0)
        # creating a black and white version of the image
        gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        # detecting the faces
        faces = haar_cascade.detectMultiScale(
            gray_img, scaleFactor=1.05, minNeighbors=2, minSize=(100, 100)
        )

        i = 0
        # for each face detected
        for x, y, w, h in faces:
            # crop the image to select only the face
            cropped_image = img[y : y + h, x : x + w]
            # loading the target image path into target_file_name variable  - replace <INSERT YOUR TARGET IMAGE NAME HERE> with the path to your target image
            target_file_name = 'stored-faces/'+folder +file_name+ str(i) + '.jpg'
            cv2.imwrite(
                target_file_name,
                cropped_image,
            )
            i = i + 1;

detect_faces("muffin")
detect_faces("chihuahua")



## Step 2: Embeddings Calculation

Calculate embeddings from the faces and pushing to PostgreSQL, you'll need to change the `<SERVICE_URI>` parameter with the PostgreSQL Service URI

In [None]:
# importing the required libraries
import numpy as np
from imgbeddings import imgbeddings
from PIL import Image
import psycopg2
import os

# connecting to the database - replace the SERVICE URI with the service URI
conn = psycopg2.connect("<PG_URI>")

for filename in os.listdir("stored-faces"):
    # opening the image
    img = Image.open("stored-faces/" + filename)
    # loading the `imgbeddings`
    ibed = imgbeddings()
    # calculating the embeddings
    embedding = ibed.to_embeddings(img)
    cur = conn.cursor()
    cur.execute("INSERT INTO pictures values (%s,%s)", (filename, embedding[0].tolist()))
    print(filename)
conn.commit()

## Step 3: Calculate embeddings on a new picture

Find the face and calculate the embeddings on the picture `solo-image.png` used for research

In [None]:
import cv2
from PIL import Image
# loading the face image path into file_name variable

# opening the image
def calculate_embeddings(file_name):
    img = cv2.imread(file_name, 0)
            
    gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    # detecting the faces
    faces = haar_cascade.detectMultiScale(
        gray_img, scaleFactor=1.05, minNeighbors=2, minSize=(200, 200)
    )

    i = 0
    # find max face
    squaremin=0
    selimg = ""
    faces = []
    #for x, y, w, h in faces:
    #    print(x)
        # crop the image to select only the face
    #    if h*w > squaremin:
    #        squaremin = h*w
    #        selimg = img[y : y + h, x : x + w]
    #        cv2.imwrite(
    #            "grey.jpg",
    #            selimg,
    #        )
    filename="grey.jpg"
    if len(faces) == 0:
        filename=file_name

    img = Image.open(filename)
    # loading the `imgbeddings`
    ibed = imgbeddings()

    # calculating the embeddings
    embedding = ibed.to_embeddings(img)
    return embedding

## Step 3: Find similar images by querying the Postgresql database using pgvector

In [None]:
from IPython.display import Image as Img, display
import ipyplot

filename =  "test/4-mix.png"
embedding=calculate_embeddings(filename)

conn = psycopg2.connect("<PG-URI")
cur = conn.cursor()
string_representation = "["+ ",".join(str(x) for x in embedding[0].tolist()) +"]"
cur.execute("SELECT * FROM pictures ORDER BY embedding <-> %s LIMIT 30;", (string_representation,))
rows = cur.fetchall()
images = []
display(Img(filename=filename, width="100px"))

for row in rows:
    images.append("stored-faces/"+row[0])
cur.close()
ipyplot.plot_images(images, max_images=30, img_width=100)