In [105]:
import numpy as np
import cv2
from tkinter import Tk, filedialog
from pathlib import Path
import os
from os import listdir
from os.path import isfile, join
from PIL import Image, ImageEnhance, ImageOps, ImageFilter
import PIL.Image
import pytesseract
pytesseract.pytesseract.tesseract_cmd = Path(r'C:\Program Files\Tesseract-OCR\tesseract.exe') #Set Pytesseract path
#import keras_ocr

import matplotlib.pyplot as plt

# Image Processing References

https://pythongeeks.org/image-processing-in-python/

https://www.section.io/engineering-education/image-adjustment-to-higher-or-lower-resolution-using-python/

In [2]:
def initiate_tk_dialog():
    root = Tk()  # pointing root to Tk() to use it as Tk() in program.
    root.withdraw()  # Hides small tkinter window.
    root.attributes('-topmost', True)  # Opened windows will be active. above all windows despite of selection.
    return root

def get_data_file(title: str, as_path = False):
    root = initiate_tk_dialog()
    if as_path == True:
        data_file: Path = Path(filedialog.askopenfilename(title= title))  # Returns opened path as str
    else:
        data_file: str = str(filedialog.askopenfilename(title= title)) 
    root.destroy()
    return data_file

def get_directory(title: str, as_path = False):
    root = initiate_tk_dialog()
    if as_path == True:
        directory: Path = Path(filedialog.askdirectory(title= title))
    else:
        directory: str = str(filedialog.askdirectory(title= title))
    root.destroy()
    return directory

In [133]:
def get_directory_files(file_extension: str):
    """Look for files in directory with file extension as file_extension.
    Returns directory path as string and list of absolute paths to files with matching extension.
    file_extension: str, file extension
    """
    directory = get_directory('Select Images Directory', as_path=False)
    onlyfiles = [f for f in listdir(directory) if isfile(join(directory, f))]
    files = list([])
    for file in onlyfiles:
         if file.endswith(file_extension):
                file_path = '/'.join([directory, file])
                files.append(file_path)
    return directory, files

In [127]:
def process_img(img_path: str, gray_scale = False):
    """Open image from img_path, invert.
    Returns processed image.
    img_path: absolute path to image file
    gray_scale: Boolean, if True image will be converted to grayscale
    """
    if gray_scale == True:
        img = PIL.Image.open(img_path).convert("L") #Convert to gray-scale
    else:
        img = PIL.Image.open(img_path)
    img = ImageOps.invert(img) #Invert
    return img

In [177]:
def get_number_from_img(img, timeout = 15):
    """Get float number from image.
    Returns float number.
    img: processed image
    timeout: pytesseract timeout value - default = 15
    """
    #Run OCR on image
    img_number = pytesseract.image_to_string(img,
                                             lang='eng',
                                             timeout=timeout,
                                             config='--psm 7 -c tessedit_char_whitelist=0123456789.')
    try:
        img_number = '.' + img_number.split('.')[-1] #Extract right side of number
        img_number = float(img_number) #convert to float
    except:
        img_number = np.nan #if img_number is empty, set as NaN
    return img_number

In [183]:
def get_frame_number(file):
    """Return frame number, based on file absolute path.
    file: file absolute path
    """
    frame_number = file.split('/')[-1].split('_frame_')[-1].split('.')[0]
    frame_number = int(frame_number)
    return frame_number

In [184]:
def get_number_list(files: list):
    """Return number list from image files.
    files: list of image files absolute paths.
    Returns: 
    number_list
    """
    number_list = []
    for file in files:
        img = process_img(file)
        img_number = get_number_from_img(img)
        number_list.append(img_number)
    return number_list

def get_frame_list(files: list):
    """Return frame number list from image files.
    files: list of image files absolute paths.
    Returns: 
    frame_number_list
    """
    frame_number_list = []
    for file in files:
        frame_number = get_frame_number(file)
        frame_number_list.append(frame_number)
    return frame_number_list

In [134]:
#Get Directory and Files Paths
directory, files = get_directory_files(file_extension='.tiff')

In [None]:
#Get number and frame number lists
number_list = get_number_list(files)
frame_number_list = get_frame_list(files)