In [1]:
import os
import time
from datetime import datetime
import json
import random
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import cv2
import tensorflow as tf
from sklearn.metrics import f1_score, fbeta_score, precision_score, recall_score,confusion_matrix,ConfusionMatrixDisplay
from tensorflow.keras import layers
from tensorflow.keras.utils import load_img,img_to_array
from tensorflow.keras.applications import MobileNetV3Large
from tensorflow.keras.applications.mobilenet_v3 import preprocess_input, decode_predictions
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense,GlobalAveragePooling2D,Input,Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.layers import RandomCrop, RandomFlip, RandomRotation, RandomContrast
from tensorflow.keras.callbacks import EarlyStopping,ModelCheckpoint
from tensorflow.keras.regularizers import l2
from tensorflow.keras.mixed_precision import experimental as mixed_precision
from tensorflow.keras import backend as K
import torch
from ultralytics import YOLO
import tkinter as tk
from tkinter import simpledialog
import warnings
warnings.filterwarnings("ignore", category=FutureWarning) 

# Clear last session to feee up space before new
K.clear_session()

# if GPU is available this code will state 1
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

# image height and width for input into Siamese Network with MobileNetV3 as a base 
IMG_HEIGHT = 300
IMG_WIDTH = 300


Num GPUs Available:  1


In [2]:
# load Siamese network model file
def load_model(model_name):
    model = tf.keras.models.load_model(model_name)
    return model

# Preprocessing function to resize and normalize image
def preprocess_image(image):
    # resize image to match height and width specified earlier 
    image_resized = cv2.resize(image, (IMG_WIDTH, IMG_HEIGHT))
    
    # Normalize values of pixels to be around 0.0-1.0
    image_resized = image_resized / 255.0  
    
    return image_resized

In [3]:
# siamese network prediction function  
def predict(id_images, img_to_evaluate, model, threshold = 0.65):
    '''
    id_images_batch - batch of saved id images, should be numpy array of shape (x, 300, 300, 3) where x is number of saved images 
    or path for image which is saved in the memory and should be compared to, in this case it is string path
    img_to_evaluate - image which will be compared to img_base which is saved in the memory
    model - AI model file which contains trained weights for siamese network
    threshold - optional value which we can state for siamese networks confidence score from 0.50 till 0.99, which means 
    that results will be shown only if confidence score for similarity is more than threshold
    '''
    
    # preprocess images
    if type(id_images) == str:
        img_base = np.array(preprocess_image(id_images))
        img_base = np.expand_dims(img_base, axis=0)  # Add batch dimension, shape becomes (1, 300, 300, 3)
    elif isinstance(id_images, np.ndarray):
        img_base = id_images

    
    img_to_evaluate = np.array(preprocess_image(img_to_evaluate))

    # expand dimensions of images to add batch dimension 
    
    img_to_evaluate = np.expand_dims(img_to_evaluate, axis=0)  # Add batch dimension, shape becomes (1, 300, 300, 3)
    
    img_to_evaluate = np.tile(img_to_evaluate, (len(img_base), 1, 1, 1))
    
    # predict the similarity between the two images,
    # returns tensor with two values like this [0.37, 0.63] which states how similar are images
    # 0.0 no similarity , 1.0 full similarity
    preds = model.predict((img_base,img_to_evaluate))
    predicted_idx = np.argmax(preds[:, 1])

    if preds[predicted_idx][1] > threshold:
        return predicted_idx, preds[predicted_idx][1]
    else:
        return 999,999


In [4]:
# load the siamese network model
model_name = f'MobileNetV3_1024_siamese.h5'
model = load_model(model_name)
# load the YOLO model 
yolo_model = torch.hub.load('ultralytics/yolov5',"custom", path=r'C:\Users\Saba\Desktop\Python\vigo\AIMV\yolov5\runs\train\exp6\weights/best.pt',force_reload=True)

Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to C:\Users\Saba/.cache\torch\hub\master.zip
YOLOv5  2024-11-23 Python-3.9.0 torch-2.5.0+cu118 CUDA:0 (NVIDIA GeForce RTX 3050 Laptop GPU, 4096MiB)

Fusing layers... 
Model summary: 157 layers, 7012822 parameters, 0 gradients, 15.8 GFLOPs
Adding AutoShape... 


In [5]:
# function returns YOLO model object detection results 
def yolo(yolo_model,frame):
    results = yolo_model(frame)
    return results

# unpack the coordinates of bounding boxes and confidence score for ID class
def get_results_of_yolo(frame_predictions):
    x1 = int(frame_predictions.xyxy[0][0][0])
    y1 = int(frame_predictions.xyxy[0][0][1])
    x2 = int(frame_predictions.xyxy[0][0][2])
    y2 = int(frame_predictions.xyxy[0][0][3])
    
    confidence = frame_predictions.xyxy[0][0][4]
    
    return x1,y1,x2,y2,confidence

# crop the id card from the frame for it to be saved or passed to siamese network for evaluating
def crop_id_card(frame_predictions, frame):
    x1,y1,x2,y2,confidence = get_results_of_yolo(frame_predictions)
    # x1 (pixels)  y1 (pixels)  x2 (pixels)  y2 (pixels)   confidence   class
    cropped_image = frame[y1:y2, x1:x2]
    
    return cropped_image

In [6]:
# file path for id_examples
id_examples_filepath = './id_examples/'
def retrieve_saved_ids():
    saved_ids_labels = []
    saved_ids_images = []
    for id_img in os.listdir(id_examples_filepath):
        id_image_uncropped = cv2.imread(id_examples_filepath+id_img)
        bounding_box_prediction = yolo(yolo_model,id_image_uncropped)
        
        # make siamese network work only in the case of finding id card shown
        if bounding_box_prediction.xyxy[0].shape != (0,6):
            # get coordinates of bounding box
            x1,y1,x2,y2,confidence_score = get_results_of_yolo(bounding_box_prediction)
            id_image_cropped = crop_id_card(bounding_box_prediction, id_image_uncropped)
            saved_ids_images.append(id_image_cropped)
        else: 
            saved_ids_images.append(id_image_uncropped)
            print("uncropped version added")
        saved_ids_labels.append(id_img)

    id_img_batch = np.array([preprocess_image(image_array) for image_array in saved_ids_images])
    return id_img_batch, saved_ids_labels

In [7]:
def add_new_id_to_database(frame, new_id_name):
    # Create a directory to store new ID images if it doesn’t exist
    os.makedirs("./id_examples/", exist_ok=True)

    # Create a unique file name with ID type and current timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    file_path = f"./id_examples/{new_id_name}_{timestamp}.jpg"

    # Save the image to the specified path
    cv2.imwrite(file_path, frame)

In [8]:
# def is_still_frame(current_frame, previous_frame, threshold=50000):
#     # Calculate the absolute difference between frames
#     diff = cv2.absdiff(previous_frame, current_frame)
#     # Convert to grayscale and compute the sum of differences
#     gray_diff = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)
#     non_zero_count = np.sum(gray_diff)
#     return non_zero_count < threshold

# def is_sharp_frame(frame, threshold=50):
#     # Calculate the Laplacian to assess sharpness (higher variance = sharper)
#     gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
#     variance = cv2.Laplacian(gray_frame, cv2.CV_64F).var()
#     return variance > threshold

def ask_user_for_id_type():
    root = tk.Tk()
    root.withdraw()
    new_id_type = simpledialog.askstring("Input", "Enter new ID type name:")
    root.destroy()
    return new_id_type

def check_for_new_card(confidence_score,frame):
    if confidence_score > 0.90:
        if frame_counter < 20:
            frame_counter += 1
            
        else:
            new_id_name = ask_user_for_id_type()
            add_new_id_to_database(frame, new_id_name)
            id_images_batch, id_labels = retrieve_saved_ids()
            frame_counter = 0
        return "found_id"
    return "no_id"
    

In [9]:
# Open video capture (0 is the default camera)
cap = cv2.VideoCapture(0)
id_images_batch, id_labels = retrieve_saved_ids()
global frame_counter = 0

while cap.isOpened():
    # Capture frame-by-frame
    ret, frame = cap.read()
    if not ret:
        print("Failed to grab frame.")
        break

    # Predict bounding box coordinates
    bounding_box_prediction = yolo(yolo_model,frame)

    # make siamese network work only in the case of finding id card shown
    if bounding_box_prediction.xyxy[0].shape != (0,6):
        # get coordinates of bounding box
        x1,y1,x2,y2,confidence_score = get_results_of_yolo(bounding_box_prediction)
        # Show the frame with prediction

        # Crop the ID card
        cropped_id_card = crop_id_card(bounding_box_prediction, frame)
        
        # Check if a new ID card is detected
        if len(id_images_batch) == 0:
            if confidence_score > 0.90:
                if frame_counter < 20:
                    frame_counter += 1
                    continue
                else:
                    new_id_name = ask_user_for_id_type()
                    add_new_id_to_database(frame, new_id_name)
                    id_images_batch, id_labels = retrieve_saved_ids()
                    frame_counter = 0
                    
        else:
            # predict with siamese network
            prediction_idx, confidence_score_siamese = predict(id_images_batch, cropped_id_card, model, threshold=0.95)

            if prediction_idx == 999:
                prediction_name = "not found"
                if confidence_score > 0.90:
                    if frame_counter < 20:
                        frame_counter += 1
                        continue
                    else:
                        new_id_name = ask_user_for_id_type()
                        add_new_id_to_database(frame, new_id_name)
                        id_images_batch, id_labels = retrieve_saved_ids()
                        frame_counter = 0
            else:
                prediction_name = id_labels[prediction_idx]

            cv2.putText(frame, f'id type: {prediction_name},{confidence_score_siamese:.2f}', (10, 30),
                          cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        if confidence_score > 0.45:
            # apply bounding box to the image
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            
            # apply confidence score for the id object detection
            text = f'{confidence_score:.2f}'
            text_position = (x1, y1 - 10)  # Slightly above the bounding box
            
            # Put text on the frame
            cv2.putText(frame, text, text_position, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    
    cv2.imshow("Video", frame)
    
    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the capture and close OpenCV windows
cap.release()
cv2.destroyAllWindows()

SyntaxError: invalid syntax (3959849619.py, line 4)

In [10]:
# Create a button that prints the input field's text when clicked
def on_button_click(id_image):
    new_id_name = input_entry.get()  # Get text from entry field
    add_new_id_to_database(id_image, new_id_name)
    id_images_batch, id_labels = retrieve_saved_ids()
    input_entry.delete(0, tk.END)  # Clear the entry field after clicking


def create_window_submit_window():
    window_submit_photo = tk.TK()
    window_submit_photo.title("ID submission")
    window.geometry("1200x800")
    
    video_label_submit = tk.Label(window_submit_photo)
    video_label_submit.pack()
    
    input_entry = tk.Entry(window, width=30)
    input_entry.pack()

    button = tk.Button(window, text="Submit", command=on_button_click)
    button.pack()
    

In [11]:
import cv2
import tkinter as tk
from PIL import Image, ImageTk

# Assuming you have defined these functions: retrieve_saved_ids, yolo, get_results_of_yolo,
# crop_id_card, ask_user_for_id_type, add_new_id_to_database, and predict.

# Open video capture (0 is the default camera)
cap = cv2.VideoCapture(0)
id_images_batch, id_labels = retrieve_saved_ids()
frame_counter = 0  # Use local variable instead of global

# Initialize the Tkinter window
window = tk.Tk()
window.title("ID Detection")
window.geometry("1200x800")





# Create a label to display the video feed
video_label = tk.Label(window)
video_label.pack()

# Create an entry field





# Function to update frame in Tkinter window
def update_frame():
    global frame_counter, id_images_batch, id_labels

    # Capture frame-by-frame
    ret, frame = cap.read()
    if not ret:
        print("Failed to grab frame.")
        window.after(10, update_frame)  # Schedule next frame update
        return

    # Predict bounding box coordinates
    bounding_box_prediction = yolo(yolo_model, frame)

    # Make siamese network work only if ID card is found
    if bounding_box_prediction.xyxy[0].shape != (0, 6):
        # Get coordinates of bounding box
        x1, y1, x2, y2, confidence_score = get_results_of_yolo(bounding_box_prediction)

        # Crop the ID card
        cropped_id_card = crop_id_card(bounding_box_prediction, frame)

        # Check if a new ID card is detected
        if len(id_images_batch) == 0:
            if confidence_score > 0.90:
                if frame_counter < 20:
                    frame_counter += 1
                else:
                    new_id_name = ask_user_for_id_type()
                    add_new_id_to_database(frame, new_id_name)
                    id_images_batch, id_labels = retrieve_saved_ids()
                    frame_counter = 0
        else:
            # Predict with siamese network
            prediction_idx, confidence_score_siamese = predict(id_images_batch, cropped_id_card, model, threshold=0.95)

            if prediction_idx == 999:
                prediction_name = "not found"
                if confidence_score > 0.90:
                    if frame_counter < 20:
                        frame_counter += 1
                    else:
                        new_id_name = ask_user_for_id_type()
                        add_new_id_to_database(frame, new_id_name)
                        id_images_batch, id_labels = retrieve_saved_ids()
                        frame_counter = 0
            else:
                prediction_name = id_labels[prediction_idx]

            cv2.putText(frame, f'id type: {prediction_name}, {confidence_score_siamese:.2f}', (10, 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        
        if confidence_score > 0.45:
            # Draw bounding box on the frame
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            text = f'{confidence_score:.2f}'
            text_position = (x1, y1 - 10)
            cv2.putText(frame, text, text_position, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Convert frame to RGB and update the Tkinter label
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img = Image.fromarray(frame_rgb)
    imgtk = ImageTk.PhotoImage(image=img)
    video_label.imgtk = imgtk
    video_label.configure(image=imgtk)

    # Schedule the next frame update
    video_label.after(10, update_frame)

# Start updating frames
update_frame()

# Define a function to handle window close
def on_closing():
    cap.release()  # Release the camera
    window.destroy()  # Close the Tkinter window

# Bind the window close event
window.protocol("WM_DELETE_WINDOW", on_closing)

# Start the Tkinter main loop
window.mainloop()
