Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use text instead of OCR to get live transcribe content #9

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,39 +15,35 @@
package com.livescrolltranscript

import android.accessibilityservice.AccessibilityService
import android.graphics.Rect
import android.os.Build
import android.util.Log
import android.view.accessibility.AccessibilityEvent
import android.view.accessibility.AccessibilityNodeInfo
import android.view.accessibility.AccessibilityNodeInfo.AccessibilityAction.ACTION_SELECT
import android.view.accessibility.AccessibilityNodeInfo.AccessibilityAction.ACTION_SHOW_ON_SCREEN
import android.view.Display
import android.widget.Toast
import androidx.annotation.RequiresApi

/**
* An [AccessibilityService] for scrolling on-screen text to match recently-played audio.
*
* When the Live Captions view scrolls, this service receives an [AccessibilityEvent]. It then takes
* a screenshot and uses OCR to read the current Live Caption text. The current accessibility tree
* When the Live Captions view scrolls, this service receives an [AccessibilityEvent]. It then grabs
* the Live Caption window node to read the current Live Caption text. The current accessibility tree
* is searched for the current caption text. If a unique [AccessibilityNodeInfo] is found to match
* the caption text, it is requested to show itself on screen.
*/
class LiveScrollTranscriptAccessibilityService : AccessibilityService() {
private val tag = "LiveScrollTranscriptAccessibilityService"
private val liveCaptionPackageName = "com.google.android.as"
private val liveCaptionViewLocation = Rect()
private val whitespaceRegex = Regex("\\s+")
private val tryRefresh =
"Live Scroll Transcript found matching text but failed to scroll. Try reloading the page."

@RequiresApi(Build.VERSION_CODES.R)
private val ocrProcessor =
OcrProcessor(liveCaptionViewLocation, ::scrollToText, this)

// Number of Live Caption view scrolls that should happen before we search for new caption text.
private val captionViewScrollsThreshold: Int = 2

private val numWordsToLookAt: Int = 10

// Number of Live Caption view scrolls that have happened since last search for caption text.
private var numCaptionViewScrolls: Int = captionViewScrollsThreshold

Expand All @@ -63,8 +59,7 @@ class LiveScrollTranscriptAccessibilityService : AccessibilityService() {
++numCaptionViewScrolls >= captionViewScrollsThreshold
) {
numCaptionViewScrolls = 0
event?.source?.getBoundsInScreen(liveCaptionViewLocation) // TODO: Lock Rect during OCR.
takeScreenshot(Display.DEFAULT_DISPLAY, applicationContext.mainExecutor, ocrProcessor)
scrollToText(event.source?.getChild(0)?.text.toString())
}
}

Expand All @@ -75,19 +70,21 @@ class LiveScrollTranscriptAccessibilityService : AccessibilityService() {
*/
private fun scrollToText(textToFind: String) {
val wordsToFind = textToFind.split(whitespaceRegex)
val keywordIndex = wordsToFind.longestWordIndex()
val wordsToFindTruncated = wordsToFind.takeLast(numWordsToLookAt)
val keywordIndex = wordsToFindTruncated.longestWordIndex()
val nodesContainingKeyword = mutableSetOf<AccessibilityNodeInfo>()
Log.d(tag, "textToFind: $textToFind")

Log.d(tag, "wordsToFind: %s".format(wordsToFind))
Log.d(tag, "keyword: %s".format(wordsToFind[keywordIndex]))
Log.d(tag, "wordsToFind: %s".format(wordsToFindTruncated))
Log.d(tag, "keyword: %s".format(wordsToFindTruncated[keywordIndex]))

getNodesContainingWord(
wordsToFind[keywordIndex], this.rootInActiveWindow, nodesContainingKeyword)
wordsToFindTruncated[keywordIndex], this.rootInActiveWindow, nodesContainingKeyword)

Log.d(tag, "nodesContainingKeyword.size: ".format(nodesContainingKeyword.size))
Log.v(tag, nodesContainingKeyword.toString())

narrowDownNodesContainingKeyword(nodesContainingKeyword, keywordIndex, wordsToFind)
narrowDownNodesContainingKeyword(nodesContainingKeyword, keywordIndex, wordsToFindTruncated)
attemptScroll(nodesContainingKeyword)
nodesContainingKeyword.forEach(AccessibilityNodeInfo::recycle)
}
Expand Down

This file was deleted.