# Game Text Extractor (App)

In [3]:
# env: opencv39
from modules.crop_image import crop_image
from modules.load_images_from_folder import load_images_from_folder, get_image_paths
import easyocr
import pytesseract
from PIL import Image
import PIL
import sys
import cv2
import numpy as np
import os

pytesseract.pytesseract.tesseract_cmd = r'C:/Users/kaan-/AppData/Local/Programs/Tesseract-OCR/tesseract.exe'

print("pytesseract: " + pytesseract.__version__)  # 0.3.10
print("OpenCV2: " + cv2.__version__)  # 4.9.0
print("easyocr: " + easyocr.__version__) # 1.6.2
print("python: " + sys.version[0:7])  # 3.9.18
print("NumPy: " + np.__version__)  # 1.26.1
print("PIL: " + PIL.__version__)  # 9.5.0

pytesseract: 0.3.10
OpenCV2: 4.9.0
easyocr: 1.6.2
python: 3.9.18 
NumPy: 1.26.1
PIL: 9.5.0


# Extract Matchmaking Frames from Video

In [6]:
# Read the video from specified path
MP4_FILE_NAME = "stream-200-trimmed.mp4"  # stream-200-trimmed.mp4
TEMPLATE_IMG = "./data/template_matching/templates/season_17_small_3.png"  # season_17_small_3
MINIMUM_ACCURACY = 0.95 # 0.95
FRAME_SKIP = 205 # 205

VIDEOS_PATH = "./data/template_matching/videos/"
MP4_PATH = VIDEOS_PATH + MP4_FILE_NAME
SAVE_LOC = "./data/template_matching/videos_images/" + \
    str(MP4_FILE_NAME[0:len(MP4_FILE_NAME)-4]) + "/"

In [7]:
def seconds_to_hms(seconds):
    hours, remainder = divmod(seconds, 3600)
    minutes, remaining_seconds = divmod(remainder, 60)
    return int(hours), int(minutes), int(remaining_seconds)

# try to create save folder
try:
    if not os.path.exists(SAVE_LOC):
        os.makedirs(SAVE_LOC)
except OSError:
    print('Error: Creating directory of data')

# variables

cam = cv2.VideoCapture(MP4_PATH) # Load Video
total_frames = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))

currentframe = 0


while (True):

    # read next frame
    ret, frame = cam.read()
    
    # check if there is a next frame
    try:
        video_frame = frame.copy()
    except:
        print('breaking')
        break

    # every x frames try to match template
    if currentframe % 205 == 0: # 240
        # load template img
        template = cv2.imread(TEMPLATE_IMG, 0)
        
        # greyscale the img
        video_frame = cv2.cvtColor(video_frame, cv2.COLOR_BGR2GRAY)
        
        # match the template
        result = cv2.matchTemplate(video_frame, template, cv2.TM_CCOEFF_NORMED)
        
        # break down the results
        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
        
        # print the frame
        print(f"frame {currentframe}/{total_frames}: {max_val}")
        
        # if good match save img
        if (max_val)>MINIMUM_ACCURACY:
            # save img
            seconds = currentframe*0.0333333333333333333
            hours, minutes, seconds = seconds_to_hms(seconds)

            file_name = f"{SAVE_LOC}time_{hours}_{minutes}_{seconds}.jpg"
            cv2.imwrite(file_name, frame)

    currentframe+=1


# Release all space and windows once done
cam.release()

frame 0/5721: 0.452786922454834
frame 205/5721: 0.4865720272064209
frame 410/5721: 0.43140077590942383
frame 615/5721: 0.5287557244300842
frame 820/5721: 0.995058536529541
frame 1025/5721: 0.4053811728954315
frame 1230/5721: 0.3602357804775238
frame 1435/5721: 0.44044771790504456
frame 1640/5721: 0.4388488531112671
frame 1845/5721: 0.32902321219444275
frame 2050/5721: 0.4384862780570984
frame 2255/5721: 0.4335598349571228
frame 2460/5721: 0.3417956233024597
frame 2665/5721: 0.41366785764694214
frame 2870/5721: 0.40898600220680237
frame 3075/5721: 0.4402392506599426
frame 3280/5721: 0.4634716510772705
frame 3485/5721: 0.256432443857193
frame 3690/5721: 0.4412793517112732
frame 3895/5721: 0.4428298771381378
frame 4100/5721: 0.4414302110671997
frame 4305/5721: 0.38717809319496155
frame 4510/5721: 0.20553255081176758
frame 4715/5721: 0.5047950148582458
frame 4920/5721: 0.5718196630477905
frame 5125/5721: 0.511805534362793
frame 5330/5721: 0.9950686693191528
frame 5535/5721: 0.4967774748802

# Apply OCR to the Extracted images

In [8]:
THRESHOLD = 69
DATA_PATH = f"./data/template_matching/videos_images/{MP4_FILE_NAME[0:len(MP4_FILE_NAME)-4]}/"
CUSTOM_TESSDATA_DIR = r'C:\Users\kaan-\AppData\Local\Programs\Tesseract-OCR\tessdata'
CUSTOM_LANGUAGE_CODE = "train"
CROPPED_IMAGES_SAVE_LOC = "./data/template_matching/videos_images_cropped/"
#LEFT, TOP, RIGHT, BOTTOM = 1207, 320, 1440, 351 # screen recoring
LEFT, TOP, RIGHT, BOTTOM = 800, 217, 967, 234 # downloaded youtube video
IMAGES = load_images_from_folder(DATA_PATH)
IMAGE_PATHS = get_image_paths(DATA_PATH)

# crop image
for path in IMAGE_PATHS:
    # crop img
    print(path)
    cropped_img = crop_image(path, CROPPED_IMAGES_SAVE_LOC, LEFT, TOP, RIGHT, BOTTOM)

    # grayscale img
    grayscaled_test_image = cropped_img.convert('L')
    grayscaled_test_image
    black_and_white_image = grayscaled_test_image.point(lambda x: 0 if x < THRESHOLD else 255, '1')
    black_and_white_image
    
    # pytesseract (ocr)
    # old_result = pytesseract.image_to_string(black_and_white_image)
    # print("- pytesseract (not trained) -")
    # print(old_result)
    # custom_text = pytesseract.image_to_string(black_and_white_image, config=f'--tessdata-dir "{CUSTOM_TESSDATA_DIR}" -l {CUSTOM_LANGUAGE_CODE}')
    # print("- pytesseract (trained model=7) -")
    # print(custom_text)

    # easyocr
    reader = easyocr.Reader(['en'])
    results = reader.readtext(CROPPED_IMAGES_SAVE_LOC + "cropped_temp.jpg")
    print("- easyocr (untrained) -")
    print(results[0][1])

CUDA not available - defaulting to CPU. Note: This module is much faster with a GPU.


./data/template_matching/videos_images/stream-200-trimmed/time_0_0_27.jpg


CUDA not available - defaulting to CPU. Note: This module is much faster with a GPU.


- easyocr (untrained) -
Ollicial_Jokerz
./data/template_matching/videos_images/stream-200-trimmed/time_0_2_57.jpg
- easyocr (untrained) -
ALoricn
