In [None]:
import json
import cv2 
import os 
import easyocr
import certifi
from difflib import SequenceMatcher
from PIL import Image


This next code segment extracts video fromes when an video is inputted

In [None]:
cam = cv2.VideoCapture("/Users/soorhansalia/Lab/videos/og_east.mp4") 
try: 
    if not os.path.exists('og_east_images'): 
        os.makedirs('og_east_images') 
except OSError: 
    print ('Error: Creating directory of data') 

currentframe = 0
while(True): 
    ret,frame = cam.read()
    if ret: 
        name = './og_east_images/frame' + str(currentframe) + '.jpg'
        print ('Creating...' + name) 
        cv2.imwrite(name, frame) 
        currentframe += 1
    else: 
        break

cam.release() 
cv2.destroyAllWindows() 

This next code segment resizes the image to a more manageable size and also rotates the image 90 degree counter clockwise which makes it much easy for the ocr model to read the text on the stakes

In [None]:
def rotate_and_resize_images(directory, size=(1300, 1300)):
    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)
        print(f"Processing file: {file_path}")

        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff')):
            try:
                with Image.open(file_path) as img:
                    rotated_img = img.rotate(90, expand=True)
                    resized_img = rotated_img.resize(size, Image.LANCZOS)
                    resized_img.save(file_path)
                    print(f"Saved image: {file_path}")
            except Exception as e:
                print(f"Error processing image {file_path}: {e}")

if __name__ == "__main__":
    image_directory = '/Users/soorhansalia/Lab/Stake'
    rotate_and_resize_images(image_directory)

This next code segment renames all the frames to something that looks like "frame[frame number].jpg" which makes it easier to compare frames in the future

In [None]:
def rename_images(folder_path):
    files = os.listdir(folder_path)

    image_files = [file for file in files if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff'))]

    for i, filename in enumerate(image_files):
        new_name = f"frame{i + 1}.jpg"
        old_file_path = os.path.join(folder_path, filename)
        new_file_path = os.path.join(folder_path, new_name)

        os.rename(old_file_path, new_file_path)
        print(f"Renamed {old_file_path} to {new_file_path}")

folder_path = '/blue/xuwang1/soorhansalia/StakeOCR/Stake'
rename_images(folder_path)

This next code segment does the actual ocr

In [None]:
reader = easyocr.Reader(['en'])

folder_path = '/blue/xuwang1/soorhansalia/StakeOCR/Stake'
output_json_path = '/blue/xuwang1/soorhansalia/StakeOCR/outputs/outputs.json'

ocr_results = {}

for filename in os.listdir(folder_path):
    if filename.endswith(".jpg"):
        file_path = os.path.join(folder_path, filename)

        if os.path.isfile(file_path):
            text_results = reader.readtext(file_path)

            extracted_texts = [text[1] for text in text_results]
            extracted_texts = [str(text) for text in extracted_texts]

            ocr_results[filename] = extracted_texts

with open(output_json_path, 'w') as json_file:
    json.dump(ocr_results, json_file, indent=4)

print(f"OCR results saved to {output_json_path}")

This next code segment goes through the initial json and finds the readings that are most similar to the target stake values and associates the read value with the target stake value

In [None]:
with open('sorted_output.json', 'r') as file:
    json_data = json.load(file)

stakes = {
    "22.57-24": [],
    "22.43-64": [],
    "22.37-40": [],
    "22.51-96": [],
    "Elyana": [],
    "21.31-4": [],
    "22.54-32": [],
    "22.40R-86": [],
    "21.5-24": [],
    "21.1-14": [],
    "17.76-92": [],
    "21.66-10": [],
    "20.97-30": [],
    "22.36-50": [],
    "22.74-148": [],
    "22.56-144": [],
    "21.22-33": [],
    "17.35-99": [],
    "22.36-7": [],
    "22.54-58": [],
    "21.Rad.SC-101": [],
    "22.56-19": [],
    "22.3-177": [],
    "22.84-100": [],
    "22.44-145": [],
    "21.14R-197": [],
    "22.49-5": [],
    "21.3-69": [],
    "22.15-111": [],
    "18.57-36": [],
    "22.23-176": [],
    "22.91-227": [],
    "22.10-59": [],
    "22.20-92": [],
    "22.94-27": [],
    "Beauty": [],
    "20.22-124": [],
    "19.58-161": [],
    "22.3-189": [],
    "22.23-170": [],
    "22.19-214": [],
    "WG9E": [],
    "22.87-19": [],
    "20.39-37": [],
    "19.71-53": [],
    "22.58-81": [],
    "21.1-95": [],
    "22.73-13": [],
    "21.1-63": [],
    "22.58-4": [],
    "22.32-93": [],
    "22.41-69": [],
    "22.48-48": [],
    "22.91-152": [],
    "21.5-188": [],
    "Festival": [],
    "21.22-165": [],
    "22.90-147": [],
    "22.6-165": [],
    "22.82-10": [],
    "22.35-156": [],
    "22.52R-64": [],
    "22.90-89": [],
    "22.42-131": [],
    "22.41-128": [],
    "22.56-47": [],
    "22.54-22": [],
    "22.6-22": [],
    "20.43-156": [],
    "22.90-64": [],
    "22.30-168": [],
    "21.18-64": [],
    "18.20-3": [],
    "22.4R-158": [],
    "Camarosa": [],
    "20.71-47": [],
    "22.11-13": [],
    "20.43-117": [],
    "22.91-90": [],
    "22.56-108": [],
    "21.25-157": [],
    "20.9-238": [],
    "20.37-92": [],
    "19.9-12": [],
    "20.62-27": [],
    "22.92-211": [],
    "22.45-71": [],
    "22.51-124": [],
    "22.92-76": [],
    "WG8B": [],
    "22.92-8": [],
    "21.14R-190": [],
    "22.58-86": [],
    "19.10-55": [],
    "22.92-75": [],
    "Felicity": [],
    "22.44-105": [],
    "22.22-133": [],
    "22.40R-91": [],
    "22.25-69": [],
    "21.16-280": [],
    "22.11-53": [],
    "22.89-4": [],
    "Florida127Low": [],
    "22.91-39": [],
    "19.57-286": [],
    "12.93-4": []
}

def find_best_match(word, possibilities):
    highest_ratio = 0
    best_match = None
    for possibility in possibilities:
        ratio = SequenceMatcher(None, word, possibility).ratio()
        if ratio > highest_ratio:
            highest_ratio = ratio
            best_match = possibility
    return best_match

for stake_key in stakes.keys():
    best_frame = None
    best_ratio = 0
    for frame, values in json_data.items():
        for value in values:
            ratio = SequenceMatcher(None, value, stake_key).ratio()
            if ratio > best_ratio:
                best_ratio = ratio
                best_frame = frame
    if best_frame is not None:
        stakes[stake_key] = best_frame

with open('updated_stakes2.json', 'w') as file:
    json.dump(stakes, file, indent=2)

print("Updated stakes dictionary saved to 'updated_stakes2.json'.")
