Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: SphericalKat <amolele@gmail.com>
- Loading branch information
1 parent
e6783f3
commit 29e345e
Showing
9 changed files
with
184 additions
and
19 deletions.
There are no files selected for viewing
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,22 +1,43 @@ | ||
import 'package:fuzzywuzzy/algorithms/weighted_ratio.dart'; | ||
import 'package:fuzzywuzzy/extractor.dart'; | ||
import 'package:fuzzywuzzy/fuzzywuzzy.dart'; | ||
|
||
void main() { | ||
// Simple ratio | ||
print(ratio("mysmilarstring", "myawfullysimilarstirng")); | ||
print(ratio("mysmilarstring", "mysimilarstring")); | ||
print(ratio('mysmilarstring', 'myawfullysimilarstirng')); | ||
print(ratio('mysmilarstring', 'mysimilarstring')); | ||
|
||
// Partial ratio | ||
print(partialRatio("similar", "somewhresimlrbetweenthisstring")); | ||
print(partialRatio('similar', 'somewhresimlrbetweenthisstring')); | ||
|
||
// Token sort ratio | ||
print(tokenSortPartialRatio("order words out of", "words out of order")); | ||
print(tokenSortRatio("order words out of", "words out of order")); | ||
print(tokenSortPartialRatio('order words out of', 'words out of order')); | ||
print(tokenSortRatio('order words out of', 'words out of order')); | ||
|
||
// Token set ratio | ||
print(tokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear")); | ||
print(tokenSetPartialRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear")); | ||
print(tokenSetRatio('fuzzy was a bear', 'fuzzy fuzzy fuzzy bear')); | ||
print(tokenSetPartialRatio('fuzzy was a bear', 'fuzzy fuzzy fuzzy bear')); | ||
|
||
// Weighted ratio | ||
print(weightedRatio("The quick brown fox jimps ofver the small lazy dog", | ||
"the quick brown fox jumps over the small lazy dog")); | ||
print(weightedRatio('The quick brown fox jimps ofver the small lazy dog', | ||
'the quick brown fox jumps over the small lazy dog')); | ||
|
||
// Extracting top 4 choices above 50% match | ||
print( | ||
extractTop( | ||
query: 'goolge', | ||
choices: [ | ||
'google', | ||
'bing', | ||
'facebook', | ||
'linkedin', | ||
'twitter', | ||
'googleplus', | ||
'bingnews', | ||
'plexoogl' | ||
], | ||
limit: 4, | ||
cutoff: 50 | ||
), | ||
); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,76 @@ | ||
import 'package:fuzzywuzzy/applicable.dart'; | ||
import 'package:fuzzywuzzy/model/extracted_result.dart'; | ||
import 'package:collection/collection.dart'; | ||
|
||
/// Class for extracting matches from a given list | ||
class Extractor { | ||
final int cutoff; | ||
final int _cutoff; | ||
|
||
Extractor([this._cutoff = 0]); | ||
|
||
/// Returns the list of choices with their associated scores of similarity in a list of [ExtractedResult] | ||
List<ExtractedResult> extractWithoutOrder( | ||
String query, List<String> choices, Applicable func) { | ||
var yields = List<ExtractedResult>.empty(growable: true); | ||
var index = 0; | ||
|
||
for (var s in choices) { | ||
var score = func.apply(query, s); | ||
|
||
if (score >= _cutoff) { | ||
yields.add(ExtractedResult(s, score, index)); | ||
} | ||
index++; | ||
} | ||
|
||
return yields; | ||
} | ||
|
||
/// Find the single best match above a score in a list of choices | ||
ExtractedResult extractOne( | ||
String query, List<String> choices, Applicable func) { | ||
var extracted = extractWithoutOrder(query, choices, func); | ||
|
||
return extracted.reduce( | ||
(value, element) => value.score > element.score ? value : element); | ||
} | ||
|
||
/// Creates a **sorted** list of [ExtractedResult] from the most similar choices | ||
/// to the least. | ||
List<ExtractedResult> extractSorted( | ||
String query, List<String> choices, Applicable func) { | ||
var best = extractWithoutOrder(query, choices, func)..sort(); | ||
return best.reversed.toList(); | ||
} | ||
|
||
/// Creates a **sorted** list of [ExtractedResult] which contain the top [limit] most similar choices using k-top heap sort | ||
List<ExtractedResult> extractTop( | ||
String query, List<String> choices, Applicable func, int limit) { | ||
var best = extractWithoutOrder(query, choices, func); | ||
var results = _findTopKHeap(best, limit); | ||
return results.reversed.toList(); | ||
} | ||
|
||
List<ExtractedResult> _findTopKHeap(List<ExtractedResult> arr, int k) { | ||
var pq = PriorityQueue<ExtractedResult>(); | ||
|
||
Extractor([this.cutoff = 0]); | ||
for (var x in arr) { | ||
if (pq.length < k) { | ||
pq.add(x); | ||
} else if (x.compareTo(pq.first) > 0) { | ||
pq.removeFirst(); | ||
pq.add(x); | ||
} | ||
} | ||
var res = List<ExtractedResult>.empty(growable: true); | ||
for (var i = k; i > 0; i--) { | ||
try { | ||
var polled = pq.removeFirst(); | ||
res.add(polled); | ||
} catch (e) { | ||
continue; | ||
} | ||
} | ||
return res; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,40 +1,86 @@ | ||
import 'package:fuzzywuzzy/applicable.dart'; | ||
import 'package:fuzzywuzzy/extractor.dart'; | ||
import 'package:fuzzywuzzy/model/extracted_result.dart'; | ||
|
||
import 'algorithms/token_set.dart'; | ||
import 'algorithms/token_sort.dart'; | ||
import 'algorithms/weighted_ratio.dart'; | ||
import 'ratios/partial_ratio.dart'; | ||
import 'ratios/simple_ratio.dart'; | ||
|
||
/// Calculates a Levenshtein simple ratio between the strings | ||
/// This indicates a measure of similarity | ||
int ratio(String s1, String s2) { | ||
return SimpleRatio().apply(s1, s2); | ||
} | ||
|
||
/// Inconsistent substrings lead to problems in matching. | ||
/// This ratio uses a heuristic called "best partial" for when two strings are | ||
/// of noticeably different lengths | ||
int partialRatio(String s1, String s2) { | ||
return PartialRatio().apply(s1, s2); | ||
} | ||
|
||
/// Find all alphanumeric tokens in the string and sort these tokens | ||
/// and then take ratio of resulting joined strings. | ||
int tokenSortRatio(String s1, String s2) { | ||
return TokenSort().apply(s1, s2, SimpleRatio()); | ||
} | ||
|
||
/// Find all alphanumeric tokens in the string and sort these tokens | ||
/// and then take partial ratio of resulting joined strings. | ||
int tokenSortPartialRatio(String s1, String s2) { | ||
return TokenSort().apply(s1, s2, PartialRatio()); | ||
} | ||
|
||
/// Splits the strings into tokens and computes intersections and remainders | ||
/// between the tokens of the two strings. A comparison string is then | ||
/// built up and is compared using the simple ratio algorithm. | ||
/// Useful for strings where words appear redundantly | ||
int tokenSetRatio(String s1, String s2) { | ||
return TokenSet().apply(s1, s2, SimpleRatio()); | ||
} | ||
|
||
/// Splits the strings into tokens and computes intersections and remainders | ||
/// between the tokens of the two strings. A comparison string is then | ||
/// built up and is compared using the partial ratio algorithm. | ||
/// Useful for strings where words appear redundantly | ||
int tokenSetPartialRatio(String s1, String s2) { | ||
return TokenSet().apply(s1, s2, PartialRatio()); | ||
} | ||
|
||
/// Calculates a weighted ratio between [s1] and [s2] using the best option from | ||
/// Calculates a weighted ratio between [s1] and [s2] using the best option from | ||
/// the above fuzzy matching algorithms | ||
/// | ||
/// | ||
/// Example: | ||
/// ```dart | ||
/// weightedRatio("The quick brown fox jimps ofver the small lazy dog", "the quick brown fox jumps over the small lazy dog") // 97 | ||
/// ``` | ||
int weightedRatio(String s1, String s2) { | ||
return WeightedRatio().apply(s1.toLowerCase(), s2.toLowerCase()); | ||
} | ||
|
||
/// Returns a sorted list of [ExtractedResult] which contains the top [limit] | ||
/// most similar choices. Will reject any items with scores below the [cutoff]. | ||
/// Default [cutoff] is 0 | ||
/// Uses [WeightedRatio] as the default algorithm. | ||
List<ExtractedResult> extractTop( | ||
{required String query, | ||
required List<String> choices, | ||
required int limit, | ||
int cutoff = 0, | ||
Applicable ratio = const WeightedRatio()}) { | ||
var extractor = Extractor(cutoff); | ||
return extractor.extractTop(query, choices, ratio, limit); | ||
} | ||
|
||
/// Returns a sorted list of [ExtractedResult] without any cutoffs. | ||
/// Uses [WeightedRatio] as the default algorithm. | ||
List<ExtractedResult> extractSorted( | ||
{required String query, | ||
required List<String> choices, | ||
int cutoff = 0, | ||
Applicable ratio = const WeightedRatio()}) { | ||
var extractor = Extractor(cutoff); | ||
return extractor.extractSorted(query, choices, ratio); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
/// A model class representing results extracted from a list | ||
class ExtractedResult implements Comparable<ExtractedResult> { | ||
final String string; | ||
final int score; | ||
final int index; | ||
|
||
/// Creates a new [ExtractedResult] with the given [string], [score] and [index] | ||
ExtractedResult(this.string, this.score, this.index); | ||
|
||
@override | ||
int compareTo(ExtractedResult other) { | ||
return score.compareTo(other.score); | ||
} | ||
|
||
@override | ||
String toString() { | ||
return '(string $string, score: $score, index: $index)'; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters