diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e925614 --- /dev/null +++ b/.gitignore @@ -0,0 +1,277 @@ + +# Created by https://www.toptal.com/developers/gitignore/api/python,pycharm,opencv +# Edit at https://www.toptal.com/developers/gitignore?templates=python,pycharm,opencv + +### OpenCV ### +#OpenCV for Mac and Linux +#build and release folders +*/CMakeFiles +*/CMakeCache.txt +*/Makefile +*/cmake_install.cmake +.DS_Store + +### PyCharm ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# AWS User-specific +.idea/**/aws.xml + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# SonarLint plugin +.idea/sonarlint/ + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +### PyCharm Patch ### +# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 + +# *.iml +# modules.xml +# .idea/misc.xml +# *.ipr + +# Sonarlint plugin +# https://plugins.jetbrains.com/plugin/7973-sonarlint +.idea/**/sonarlint/ + +# SonarQube Plugin +# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin +.idea/**/sonarIssues.xml + +# Markdown Navigator plugin +# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced +.idea/**/markdown-navigator.xml +.idea/**/markdown-navigator-enh.xml +.idea/**/markdown-navigator/ + +# Cache file creation bug +# See https://youtrack.jetbrains.com/issue/JBR-2257 +.idea/$CACHE_FILE$ + +# CodeStream plugin +# https://plugins.jetbrains.com/plugin/12206-codestream +.idea/codestream.xml + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintainted in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# End of https://www.toptal.com/developers/gitignore/api/python,pycharm,opencv diff --git a/.idea/workspace.xml b/.idea/workspace.xml deleted file mode 100644 index b2c9fcd..0000000 --- a/.idea/workspace.xml +++ /dev/null @@ -1,344 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1581826543105 - - - - - - - - - - - - - - - - 147 - - - - 134 - - - - 135 - - - - 136 - - - file://$PROJECT_DIR$/detect_text_east/lib_east/eval.py - 263 - - - file://$PROJECT_DIR$/result_processing/eval_classes.py - 92 - - - file://$PROJECT_DIR$/detect_text_east/lib_east/eval.py - 108 - - - file://$PROJECT_DIR$/cnn/CNN.py - 62 - - - file://$PROJECT_DIR$/run_single.py - 27 - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/README.md b/README.md index 8c0b489..64265f7 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ UIED comprises two parts to detect UI text and graphic elements, such as button, * **Python 3.5** * **Opencv 3.4.2** * **Pandas** +* **Google Cloud Vision** ([link](https://cloud.google.com/vision/docs/libraries)) @@ -41,7 +42,7 @@ UIED comprises two parts to detect UI text and graphic elements, such as button, The new version of UIED equipped with Google OCR is easy to deploy and no pre-trained model is needed. Simply donwload the repo along with the dependencies. -> Please replace the Google OCR key at `detect_text/ocr.py line 28` with your own (apply in [Google website](https://cloud.google.com/vision)). +> Please either export GOOGLE_APPLICATION_CREDENTIALS in your environment or provide the path to the private key file for Google Cloud Vision at `detect_text/ocr.py line 13` (apply in [Google website](https://cloud.google.com/vision), then follow the [Setup](https://cloud.google.com/vision/docs/setup#auth-env)). ### Usage To test your own image(s): diff --git a/config/__pycache__/CONFIG_UIED.cpython-37.pyc b/config/__pycache__/CONFIG_UIED.cpython-37.pyc deleted file mode 100644 index ee8516a..0000000 Binary files a/config/__pycache__/CONFIG_UIED.cpython-37.pyc and /dev/null differ diff --git a/detect_text/ocr.py b/detect_text/ocr.py index 2149ebd..ab68c70 100644 --- a/detect_text/ocr.py +++ b/detect_text/ocr.py @@ -1,40 +1,31 @@ import cv2 import os -import requests +import io +from google.cloud import vision import json from base64 import b64encode import time -def Google_OCR_makeImageData(imgpath): - with open(imgpath, 'rb') as f: - ctxt = b64encode(f.read()).decode() - img_req = { - 'image': { - 'content': ctxt - }, - 'features': [{ - 'type': 'DOCUMENT_TEXT_DETECTION', - # 'type': 'TEXT_DETECTION', - 'maxResults': 1 - }] - } - return json.dumps({"requests": img_req}).encode() - - def ocr_detection_google(imgpath): start = time.clock() - url = 'https://vision.googleapis.com/v1/images:annotate' - api_key = 'AIzaSyDUc4iOUASJQYkVwSomIArTKhE2C6bHK8U' # *** Replace with your own Key *** - imgdata = Google_OCR_makeImageData(imgpath) - response = requests.post(url, - data=imgdata, - params={'key': api_key}, - headers={'Content_Type': 'application/json'}) - # print('*** Text Detection Time Taken:%.3fs ***' % (time.clock() - start)) - print("*** Please replace the Google OCR key at detect_text/ocr.py line 28 with your own (apply in https://cloud.google.com/vision) ***") - if response.json()['responses'] == [{}]: + if os.getenv('GOOGLE_APPLICATION_CREDENTIALS') is None: + KEY_PATH = "" # Set the path to the private key file created in https://cloud.google.com/vision/docs/setup#sa-create + os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = KEY_PATH + try: + client = vision.ImageAnnotatorClient() + except Exception as e: + print("*** Please export GOOGLE_APPLICATION_CREDENTIALS to the environment (apply in https://cloud.google.com/vision) ***") + print(f"Exception {e}") + return None + with io.open(imgpath, 'rb') as image_file: + content = image_file.read() + image = vision.Image(content=content) + response = client.document_text_detection(image=image) + print('*** Text Detection Time Taken:%.3fs ***' % (time.clock() - start)) + + if not response.text_annotations: # No Text return None else: - return response.json()['responses'][0]['textAnnotations'][1:] + return response.text_annotations[1:] diff --git a/detect_text/text_detection.py b/detect_text/text_detection.py index 632704f..15f000c 100644 --- a/detect_text/text_detection.py +++ b/detect_text/text_detection.py @@ -93,14 +93,11 @@ def text_cvt_orc_format(ocr_result): error = False x_coordinates = [] y_coordinates = [] - text_location = result['boundingPoly']['vertices'] - content = result['description'] + text_location = result.bounding_poly.vertices + content = result.description for loc in text_location: - if 'x' not in loc or 'y' not in loc: - error = True - break - x_coordinates.append(loc['x']) - y_coordinates.append(loc['y']) + x_coordinates.append(loc.x) + y_coordinates.append(loc.y) if error: continue location = {'left': min(x_coordinates), 'top': min(y_coordinates), 'right': max(x_coordinates), 'bottom': max(y_coordinates)} diff --git a/run_single.py b/run_single.py index a22a50f..b1f3e31 100644 --- a/run_single.py +++ b/run_single.py @@ -2,6 +2,7 @@ import cv2 import os import numpy as np +import argparse def resize_height_by_longest_edge(img_path, resize_length=800): @@ -47,15 +48,23 @@ def color_tips(): mobile: {'min-grad':4, 'ffl-block':5, 'min-ele-area':50, 'max-word-inline-gap':6, 'max-line-gap':1} web : {'min-grad':3, 'ffl-block':5, 'min-ele-area':25, 'max-word-inline-gap':4, 'max-line-gap':4} ''' - key_params = {'min-grad':10, 'ffl-block':5, 'min-ele-area':50, - 'merge-contained-ele':True, 'merge-line-to-paragraph':False, 'remove-bar':True} + parser = argparse.ArgumentParser() + parser.add_argument('--input', type=str, required=True, help="The input path of the screenshot") + parser.add_argument('--show', action='store_true', help="Show the annotation stages") + args = parser.parse_args() + + key_params = {'min-grad': 4, 'ffl-block': 5, 'min-ele-area': 50, + 'merge-contained-ele': True, 'merge-line-to-paragraph': True, 'remove-bar': True} # set input image path - input_path_img = 'data/input/497.jpg' + # input_path_img = 'data/input/497.jpg' + show = args.show + input_path_img = args.input output_root = 'data/output' resized_height = resize_height_by_longest_edge(input_path_img, resize_length=800) - color_tips() + if show: + color_tips() is_ip = True is_clf = False @@ -64,28 +73,33 @@ def color_tips(): if is_ocr: import detect_text.text_detection as text + os.makedirs(pjoin(output_root, 'ocr'), exist_ok=True) - text.text_detection(input_path_img, output_root, show=True, method='google') + text.text_detection(input_path_img, output_root, show=show, method='google') if is_ip: import detect_compo.ip_region_proposal as ip + os.makedirs(pjoin(output_root, 'ip'), exist_ok=True) # switch of the classification func classifier = None if is_clf: classifier = {} from cnn.CNN import CNN + # classifier['Image'] = CNN('Image') classifier['Elements'] = CNN('Elements') # classifier['Noise'] = CNN('Noise') ip.compo_detection(input_path_img, output_root, key_params, - classifier=classifier, resize_by_height=resized_height, show=False) + classifier=classifier, resize_by_height=resized_height, show=show) if is_merge: import detect_merge.merge as merge + os.makedirs(pjoin(output_root, 'merge'), exist_ok=True) name = input_path_img.split('/')[-1][:-4] compo_path = pjoin(output_root, 'ip', str(name) + '.json') ocr_path = pjoin(output_root, 'ocr', str(name) + '.json') merge.merge(input_path_img, compo_path, ocr_path, pjoin(output_root, 'merge'), - is_remove_bar=key_params['remove-bar'], is_paragraph=key_params['merge-line-to-paragraph'], show=True) + is_remove_bar=key_params['remove-bar'], is_paragraph=key_params['merge-line-to-paragraph'], + show=show)