Skip to content

Commit

Permalink
Implement baked predictive text
Browse files Browse the repository at this point in the history
  • Loading branch information
alex-the-man committed Dec 24, 2021
1 parent 594a7c8 commit 9125c5e
Show file tree
Hide file tree
Showing 26 changed files with 115,952 additions and 14 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,6 @@ fastlane/report.xml
fastlane/Preview.html
fastlane/screenshots
fastlane/test_output

NGramBuilder/zh_HK.ngram

131 changes: 123 additions & 8 deletions Cantoboard.xcodeproj/project.pbxproj

Large diffs are not rendered by default.

79 changes: 79 additions & 0 deletions Cantoboard.xcodeproj/xcshareddata/xcschemes/NGramBuilder.xcscheme
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
LastUpgradeVersion = "1320"
version = "1.3">
<BuildAction
parallelizeBuildables = "YES"
buildImplicitDependencies = "YES">
<BuildActionEntries>
<BuildActionEntry
buildForTesting = "YES"
buildForRunning = "YES"
buildForProfiling = "YES"
buildForArchiving = "YES"
buildForAnalyzing = "YES">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "7904A1D82771481200963CAB"
BuildableName = "NGramBuilder"
BlueprintName = "NGramBuilder"
ReferencedContainer = "container:Cantoboard.xcodeproj">
</BuildableReference>
</BuildActionEntry>
</BuildActionEntries>
</BuildAction>
<TestAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
shouldUseLaunchSchemeArgsEnv = "YES">
<Testables>
</Testables>
</TestAction>
<LaunchAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
launchStyle = "0"
useCustomWorkingDirectory = "YES"
customWorkingDirectory = "${PROJECT_DIR}/NGramBuilder"
ignoresPersistentStateOnLaunch = "NO"
debugDocumentVersioning = "YES"
debugServiceExtension = "internal"
allowLocationSimulation = "YES">
<BuildableProductRunnable
runnableDebuggingMode = "0">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "7904A1D82771481200963CAB"
BuildableName = "NGramBuilder"
BlueprintName = "NGramBuilder"
ReferencedContainer = "container:Cantoboard.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
</LaunchAction>
<ProfileAction
buildConfiguration = "Release"
shouldUseLaunchSchemeArgsEnv = "YES"
savedToolIdentifier = ""
useCustomWorkingDirectory = "NO"
debugDocumentVersioning = "YES">
<BuildableProductRunnable
runnableDebuggingMode = "0">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "7904A1D82771481200963CAB"
BuildableName = "NGramBuilder"
BlueprintName = "NGramBuilder"
ReferencedContainer = "container:Cantoboard.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
</ProfileAction>
<AnalyzeAction
buildConfiguration = "Debug">
</AnalyzeAction>
<ArchiveAction
buildConfiguration = "Release"
revealArchiveInOrganizer = "YES">
</ArchiveAction>
</Scheme>
Binary file not shown.
28 changes: 26 additions & 2 deletions CantoboardFramework/Keyboard/CandidateOrganizer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ protocol CandidateSource: AnyObject {
func getSectionHeader(section: Int) -> String?
var supportedGroupByModes: [GroupByMode] { get }
var groupByMode: GroupByMode { get set }
var isStatic: Bool { get }
}

extension CandidateSource {
Expand All @@ -51,6 +52,8 @@ class InputEngineCandidateSource: CandidateSource {
private weak var inputController: InputController?
private var _groupByMode = GroupByMode.byFrequency

var isStatic: Bool { false }

init(inputController: InputController) {
self.inputController = inputController
}
Expand Down Expand Up @@ -386,6 +389,7 @@ class InputEngineCandidateSource: CandidateSource {
class AutoSuggestionCandidateSource: CandidateSource {
private let candidates: [String]
let cannotExpand: Bool
var isStatic: Bool { true }

init(_ candidates: [String], cannotExpand: Bool = false) {
self.candidates = candidates
Expand Down Expand Up @@ -446,6 +450,13 @@ enum AutoSuggestionType {

// This class filter, group by and sort the candidates.
class CandidateOrganizer {
private static let predictiveTextEngine: PredictiveTextEngine = initPredictiveTextEngine()

private static func initPredictiveTextEngine() -> PredictiveTextEngine {
let dictsPath = DataFileManager.builtInNGramDictDirectory
return PredictiveTextEngine(dictsPath + "/zh_HK.ngram")
}

private static let halfWidthPunctuationCandidateSource = AutoSuggestionCandidateSource([".", ",", "?", "!", "", "", "", ""], cannotExpand: true)
private static let fullWidthPunctuationCandidateSource = AutoSuggestionCandidateSource(["", "", "", "", "", ".", ",", "?", "!"], cannotExpand: true)
private static let halfWidthDigitCandidateSource = AutoSuggestionCandidateSource(["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"])
Expand All @@ -454,7 +465,7 @@ class CandidateOrganizer {
private static let fullWidthUpperDigitCandidateSource = AutoSuggestionCandidateSource(["", "", "", "", "", "", "", "", "", "", "", "", "", "", ""])
private static let emailCandidateSource = AutoSuggestionCandidateSource(["gmail.com", "outlook.com", "icloud.com", "yahoo.com", "hotmail.com"])
private static let domainCandidateSource = AutoSuggestionCandidateSource(["com", "org", "edu", "net", SessionState.main.localDomain, "hk", "tw", "mo", "cn", "uk", "jp"].unique())

enum GroupBy {
case frequency, radical, stroke, tone
}
Expand All @@ -463,6 +474,7 @@ class CandidateOrganizer {
var onReloadCandidates: ((CandidateOrganizer) -> Void)?
var candidateSource: CandidateSource?
var autoSuggestionType: AutoSuggestionType?
var suggestionContextualText: String = ""

weak var inputController: InputController?

Expand All @@ -472,7 +484,9 @@ class CandidateOrganizer {

func requestMoreCandidates(section: Int) {
guard section == 0, !(inputController?.inputEngine.hasRimeLoadedAllCandidates ?? false) else { return }
updateCandidates(reload: false)
if !(candidateSource?.isStatic ?? false) {
updateCandidates(reload: false)
}
}

func updateCandidates(reload: Bool, targetCandidatesCount: Int = 0) {
Expand All @@ -490,6 +504,16 @@ class CandidateOrganizer {
case .email: candidateSource = Self.emailCandidateSource
case .domain: candidateSource = Self.domainCandidateSource
}

if !suggestionContextualText.isEmpty &&
(autoSuggestionType == .halfWidthPunctuation ||
autoSuggestionType == .fullWidthPunctuation) {
let predictiveCandidates = Self.predictiveTextEngine.predict(suggestionContextualText) as NSArray as? [String]
if let predictiveCandidates = predictiveCandidates, !predictiveCandidates.isEmpty {
DDLogInfo("Predictive text: \(suggestionContextualText) \(predictiveCandidates)")
candidateSource = AutoSuggestionCandidateSource(predictiveCandidates)
}
}
} else {
candidateSource = nil
}
Expand Down
1 change: 1 addition & 0 deletions CantoboardFramework/Keyboard/InputController.swift
Original file line number Diff line number Diff line change
Expand Up @@ -916,6 +916,7 @@ class InputController: NSObject {

defer {
candidateOrganizer.autoSuggestionType = newAutoSuggestionType
candidateOrganizer.suggestionContextualText = textBeforeInput
}

guard let lastCharBefore = textBeforeInput.last else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,15 @@ class CandidateCollectionViewFlowLayout: UICollectionViewFlowLayout {
let section = headerAttributes.indexPath.section
guard section < collectionView.numberOfSections else { return }

var headerSize = CGSize(width: candidatePaneView.sectionHeaderWidth, height: rowHeight)
var headerSizeInBytes = CGSize(width: candidatePaneView.sectionHeaderWidth, height: rowHeight)
let numOfItemsInSection = collectionView.numberOfItems(inSection: section)
var origin = headerAttributes.frame.origin
if numOfItemsInSection > 0,
let rectOfLastItemInSection = layoutAttributesForItem(at: [section, numOfItemsInSection - 1]) {
origin.y = min(origin.y, rectOfLastItemInSection.frame.maxY - headerSize.height)
origin.y = min(origin.y, rectOfLastItemInSection.frame.maxY - headerSizeInBytes.height)
// Expand the header to cover the whole section vertically.
headerSize.height = rectOfLastItemInSection.frame.maxY - origin.y
headerSizeInBytes.height = rectOfLastItemInSection.frame.maxY - origin.y
}
headerAttributes.frame = CGRect(origin: origin, size: headerSize)
headerAttributes.frame = CGRect(origin: origin, size: headerSizeInBytes)
}
}
1 change: 1 addition & 0 deletions CantoboardFramework/Utils/DataFileManager.swift
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class DataFileManager {
static let logsDirectory = "\(cacheDirectory)/Logs"
static let builtInEnglishDictDirectory = "\(cacheDataDirectory)/EnglishDict"
static let builtInUnihanDictDirectory = "\(cacheDataDirectory)/Unihan"
static let builtInNGramDictDirectory = "\(cacheDataDirectory)/NGram"
static let versionFilePath = "\(cacheDataDirectory)/version"

static let documentDirectory = getDocumentDirectoryPath()
Expand Down
28 changes: 28 additions & 0 deletions CantoboardFramework/Utils/NGram.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#ifndef NGRAM_H_
#define NGRAM_H_

#pragma pack(push,1)

enum NGramSectionId {
weight = 0,
trie = 1
};

struct NGramSectionHeader {
size_t dataSizeInBytes;
size_t dataOffset;
};

struct NGramHeader {
const char magicHeader[8] = {'C', 'A', 'N', 'T', 'N', 'G', 'A', 'M'};
short headerSizeInBytes = sizeof(NGramHeader);
short version = 0;
char maxN;
size_t numOfEntries;
NGramSectionHeader sections[2];
};
#pragma pack(pop)

typedef __fp16 Weight;

#endif // NGRAM_H_
Loading

0 comments on commit 9125c5e

Please sign in to comment.