# Purpose: Detect signature fields in documents

How the implementation will work:
1. Image or PDF file?
	- Image: move on to next step
	- PDF: extract PDF pages into multiple images
2. Send each image into ChatGPT - return whether or not each image has a signature field inside it or not. 
	- Yes: Move onto next step
	- No: Scan next image/stop program if no more images
3. Use OCR(optical character recognition) to analyze the text in the images - analyze in English and Thai
4. Get the position of the text where it says something like "siganture" or "name" or "first name" or "last name" or "surname" 
5. Create variations of the documents with each possible place for the signature marked with a bounding box
6. Send into GPT and have it check if the box marked is really the signature field
	- For the ones that are, save the coords of those boxes and then combine it all back into one image. 
8. Save that image and display it to the user 

# Step 1: Image or PDF?

- If PDF, extract iamges

# Step 2: Have GPT check if it has a singature field or not

- Since we're iterating through the images anyways might as well have GPT open them and check

In [3]:
import os
from pdf2image import convert_from_path
from dotenv import dotenv_values
from openai import AzureOpenAI
import base64
import requests

secrets = dotenv_values("../.env")

fileDirPath = "./files"

imagesDirPath = "./images"

signatureDocList = []


# First convert image given locally to a base 64 encoding in order to be able to pass it through to GPT
def encodeImage(pathLink:str):
	with open(pathLink, "rb") as image_file:
		return base64.b64encode(image_file.read()).decode('utf-8')

# Send in an image along with the 'instructional' prompt to GPT and get it's response
# Allowing instruction prompt to be changed with call of function so that we can reuse this function for the GPT signature field doube check
def imgSignatureFieldCheck(pathLink: str, urlOrNot: bool, secrets, prompt):
	headers = {
		"Content-Type": "application/json",
		"Authorization": f"Bearer {secrets["AZURE_OPENAI_API_KEY"]}"
	}
	# If the link provided is a url or not
	# Pass in both the 'instructional' prompt and the image to GPT
	# Need to make payload and header since we need to send it in as a request in order to be able to attach an image to it
	payload = None
	if urlOrNot:
		payload = {"model": "gpt-4o", "messages": prompt + [{"role": "user", "content": [{"type": "image_url", "image_url":{"url": pathLink}}]}]}
	else:
		encodedImage = encodeImage(pathLink)
		payload = {"model": "gpt-4o", "messages": prompt + [{"role": "user", "content": [{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encodedImage}"}}]}]}
	
	response = requests.post(secrets["AZURE_OPENAI_ENDPOINT"], headers=headers, json=payload)

	return response.content
	
# Instruction prompt for GPT for what to do
instructionPrompt = [{"role":"system", "content": "You are an image checker who will check whether or not a document image contains a signature field or not. The signature field can be in any language but will usually be Thai or Enligsh. You will return the result of that detection strictly as a boolean value. You output should ONLY be either 'true' or 'false' and shouldn't have anything else inside it. "}]
# Iterate through the directory files
for root, dirs, files in os.walk(fileDirPath):
	for filename in files:
		print(filename)
		# Check the type of file using the name
		filetype = filename.split(".")[-1]
		if filetype == "pdf":
			# Split the pdf into images
			convertedImages = convert_from_path(f"{fileDirPath}/{filename}")
			# Save the converted images
			for i in range(len(convertedImages)):
				newImgPath = f"{imagesDirPath}/{filename.split(".")[0]}_{i}.jpg"
				convertedImages[i].save(newImgPath, "JPEG")
				if not bool(imgSignatureFieldCheck(newImgPath, False, secrets, instructionPrompt)):
					os.remove(newImgPath)

		# If it's an image just move it
		elif filetype in ("png", "jpeg", "jpg"):
			# Check if it has a signature field
			sigOrNot = bool(imgSignatureFieldCheck(f"{fileDirPath}/{filename}", False, secrets, instructionPrompt))
			print(sigOrNot)
			if sigOrNot:
				# Move to the image directory
				os.rename(f"{fileDirPath}/{filename}", f"{imagesDirPath}/{filename}")



# Step 3 - OCR to analyze the text in the images and determine where the signature fields are
# Step 4 - Check which words match up to signature fields and save to .csv file

- Find the boxes with words that match up to the words commonly used in the signature fields
- Get a rectangular shape that encompasses all of the words that match with - set as designated signature field zone

In [4]:
import easyocr
import os
import csv

# Create the reader obj instance - supports both English and Thai
reader = easyocr.Reader(["en", "th"], gpu=True)

# Words that are used for signature fields; cr: found by gemini, some words were cut off and some were added in by me too
signatureWords = [
	"signature", "ลายเซ็น",
	"sign", "เซ็น",
	"signed", "ลงนาม",
	"date", "วันที่",
	"name", "ชื่อ",
	"title", "ตำแหน่ง",
	"printed name", "ชื่อพิมพ์",
	"authorized signature", "ลายเซ็นที่ได้รับอนุญาต",
	"signer", "ผู้ลงนาม",
	"witness", "พยาน",
	"initials", "อักษรย่อ",
	"signature line", "เส้นลายเซ็น",
	"signature block", "บล็อกลายเซ็น",
	"signature field", "ช่องลายเซ็น",
	"electronic signature", "ลายเซ็นอิเล็กทรอนิกส์",
	"digital signature", "ลายเซ็นดิจิทัล",
	"signature of applicant", "ลายเซ็นของผู้สมัคร",
	"signature of authorized person", "ลายเซ็นของผู้ที่ได้รับอนุญาต",
	"signature of representative", "ลายเซ็นของตัวแทน",
	"signature of witness", "ลายเซ็นของพยาน"
]

fileDirPath = "./files"

imagesDirPath = "./images"

signatureFiles = {}
filenames = []
# Read through each text in the images folder
# Save it to the .csv
with open("signature_files_coords.csv", "w+", encoding="UTF-8") as file:
	dictwriter = csv.DictWriter(file, fieldnames = ["filename", "results"])
	for root, dirs, files in os.walk(imagesDirPath):
		for filename in files:
			filenames.append(filename)
			results = reader.readtext(f"{imagesDirPath}/{filename}")
			# Add the coordinates to the signature file if the text is inside the signature words
			matchedText = {}
			for result in results:
				for word in signatureWords:
					# Check if any signature word is inside the words scanned from the page
					if (word in result[1]) and (result[2] > 0.7) and (len(word) < 20):
						# Add the coordinates into a collected dict of coords and words that match
						matchedText[result[1]] = result[0]
			dictwriter.writerow({"filename": filename, "results": matchedText})
		

print("Finished processing and saving the OCR results that match signature fields. ")




Finished processing and saving the OCR results that match signature fields. 


# Step 5. Create variations of the documents with each possible place for the signature marked with a bounding box
# Step 6. Send into GPT and have it check if the box marked is really the signature field
 
- Open the files/signature data first to load it for use

In [5]:
import csv

signatureFiles = {}
with open("signature_files_coords.csv", "r", encoding="UTF-8") as file:
	reader = csv.DictReader(file, fieldnames= ["filename", "results"])
	for row in reader:
		# Csv reader will return it as a string - conv the results back, to dict
		convBack = None
		exec(f"convBack = {row["results"]}", locals())
		signatureFiles[row["filename"]] = convBack

print(signatureFiles)

{'--1086x1536.jpg': {'(ลงลายมือชื่อ)': [[560, 1360], [678, 1360], [678, 1390], [560, 1390]], '(ลงลายมือชื่อ)_': [[194, 1242], [314, 1242], [314, 1274], [194, 1274]]}, '-.1-1086x1536.jpg': {'ชื่อห้างหุ้นส่วนหรือบริษัท.': [[139, 223], [318, 223], [318, 256], [139, 256]], 'วันที่จดทะเบียน': [[590, 268], [700, 268], [700, 294], [590, 294]], 'คนขึ้นไป เมื่อวันที่': [[294, 518], [420, 518], [420, 549], [294, 549]], 'ลงชื่อ': [[468, 1330], [512, 1330], [512, 1356], [468, 1356]], 'ลงชื่อ_': [[468, 1390], [512, 1390], [512, 1416], [468, 1416]], 'วันที่': [[468, 1450], [506, 1450], [506, 1476], [468, 1476]]}, '-.2-2-1086x1536.jpg': {'คำรับรองลายมือชื่อของพยาน': [[431, 61], [699, 61], [699, 99], [431, 99]], '(ลงลายมือชื่อ': [[509, 574], [610, 574], [610, 603], [509, 603]], '(ลงลายมือชื่อ)': [[278, 1315], [385, 1315], [385, 1351], [278, 1351]], '(ลงลายมือชื่อ).': [[556, 1448], [662, 1448], [662, 1476], [556, 1476]]}, '-.3-1-1086x1536.jpg': {'(ลงลายมือชื่อ)': [[244, 1277], [359, 1277], [359, 1313],

In [9]:
import os
import cv2
import shutil
from dotenv import dotenv_values

# Get secrets
secrets = dotenv_values("./.env")

# Make the directories based on each filename
checkDirPath = "./checker"

imagesDirPath = "./images"

# Draw on the image using opencv and saving it inside the checks folder
# Function to made the images with each result to check with gpt if it's a signature field
def makeImgChecks(checkDirPath: str, imageFilename: str, results):
	confimedSignatureCoords = []
	imageFoldername = "".join(imageFilename.split(".")[:-1])
	# Remove the files in the path if they alr exist
	if os.path.exists(f"{checkDirPath}/{imageFoldername}"):
		shutil.rmtree(f"{checkDirPath}/{imageFoldername}")
	# Make a folder to store all the images to check
	os.mkdir(f"{checkDirPath}/{imageFoldername}")
	for i in range(len(results)):
		image = cv2.imread(f"{imagesDirPath}/{imageFilename}")
		text = list(results.keys())[i]
		coord1 = results[text][0]
		coord2 = results[text][2]
		# Draw the border rectange at the coordinates given by the OCR text
		image = cv2.rectangle(image, (coord1[0] - 100, coord1[1]), (coord2[0] + 100, coord2[1] + 50), (0, 255, 0), 2)
		checkingPath = f"{checkDirPath}/{imageFoldername}/{i}_{imageFilename}"
		cv2.imwrite(checkingPath, image)
		
		# Send into GPT for confirmation
		prompt = [{"role":"system", "content": "You are an image checker who will confirm whether or not the blue rectangular box in the document is showing is a signature field or not. A field to enter your name normally or anything that does not require a signature is NOT a signature field.  The signature field can be in any language but will usually be Thai or Enligsh. You will return the result of that detection strictly as a boolean value which will either confirm that the green rectangualr box in the image is part of a signature field or not. You output should ONLY be either 'true' or 'false' and shouldn't have anything else inside it. "}]
		if bool(imgSignatureFieldCheck(checkingPath, False, secrets=secrets, prompt = prompt)):
			confimedSignatureCoords.append((coord1, coord2))
					 
	return confimedSignatureCoords
		
		

# Run the function to create boxes around the possible signature fields for checking and save it to device
confirmedFiles = {}
for file in signatureFiles:
	confirmedFiles[file] = makeImgChecks(checkDirPath, file, signatureFiles[file])

print(confirmedFiles)

{'--1086x1536.jpg': [([560, 1360], [678, 1390]), ([194, 1242], [314, 1274])], '-.1-1086x1536.jpg': [([139, 223], [318, 256]), ([590, 268], [700, 294]), ([294, 518], [420, 549]), ([468, 1330], [512, 1356]), ([468, 1390], [512, 1416]), ([468, 1450], [506, 1476])], '-.2-2-1086x1536.jpg': [([431, 61], [699, 99]), ([509, 574], [610, 603]), ([278, 1315], [385, 1351]), ([556, 1448], [662, 1476])], '-.3-1-1086x1536.jpg': [([244, 1277], [359, 1313]), ([574, 1412], [688, 1442])], '-.3-2-1086x1536.jpg': [([270, 1276], [376, 1304])], '-.5-1086x1536.jpg': [([52, 168], [278, 196]), ([206, 262], [274, 288]), ([772, 296], [818, 320])]}


# Step 8: Combine all the confirmed coordinates of image and compile them into one single image

In [11]:
import cv2

finalDirPath = "./final"

def combineFinalImage(destinationDirPath: str, originalImagePath: str, signatureCoords: list):
    image = cv2.imread(filename=originalImagePath)
    # Make the rectangles in the image
    for coord in signatureCoords: 
        image = cv2.rectangle(image, coord[0], coord[1], color=(255, 0, 0), thickness=5)
    cv2.imwrite(f"{destinationDirPath}/{originalImagePath.split("/")[-1]}", image)

for file in confirmedFiles:
    combineFinalImage(finalDirPath, f"./images/{file}", confirmedFiles[file])