Skip to content

Commit

Permalink
version: 0.0.1-beta.7
Browse files Browse the repository at this point in the history
  • Loading branch information
GerhardMalan committed Nov 22, 2022
1 parent 8cdedd7 commit 6d8b137
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 11 deletions.
8 changes: 4 additions & 4 deletions lib/src/query_parser/query_parser.dart
Expand Up @@ -106,11 +106,11 @@ abstract class QueryParserMixin implements QueryParser {
docIndex.keywordPostings
.removeWhere((key, value) => !dFtMap.keys.contains(key));
docIndex.postings.removeWhere((key, value) => !dFtMap.keys.contains(key));
// - get the weighted document term freqeuncies from the postings
// - get the weighted document term frequencies from the postings
final weightedDtf =
InvertedIndex.docTermFrequencies(docIndex.postings, documentZones);
// - retrieve the corpus size from the index
final n = docIndex.postings.length;
final n = await index.vocabularyLength;
// - get the inverse term document frequencies for the terms
final idfMap = dFtMap.idFtMap(n);
// - get a tf-idft map for the weighted document term frequencies
Expand Down Expand Up @@ -180,9 +180,9 @@ abstract class QueryParserMixin implements QueryParser {
final normalized = v / max;
final value = normalized > 0.9
? QueryTermModifier.EXACT
: normalized > 0.5
: normalized > 0.8
? QueryTermModifier.IMPORTANT
: normalized > 0.3
: normalized > 0.75
? QueryTermModifier.AND
: QueryTermModifier.NOT;
return MapEntry(k, value);
Expand Down
9 changes: 5 additions & 4 deletions test/free_text_search_test.dart
Expand Up @@ -33,14 +33,15 @@ void main() {
// final companyNames = await HashTagAnalyzer.getCompanyNames(service);
final index = await HashTagIndex.hydrate();
final documents = TestData.stockNews;
final documentZones = {'name': 20.0, 'description': 1.0};
final weightingStrategy =
WeightingStrategy(zoneWeights: documentZones, positionThreshold: 0);
// final documentZones = {'name': 20.0, 'description': 1.0};
// final weightingStrategy =
// WeightingStrategy(
// zoneWeights: documentZones, positionThreshold: null);
for (final e in documents.entries) {
final document = e.value;
final name = document['name'];
final results = await FreeTextSearch(index).document(document,
weightingStrategy: weightingStrategy,
weightingStrategy: HashTagQueryAnalyzer.kWeightingStrategy,
limit: 5,
nGramRange: NGramRange(1, 3),
tokenFilter: HashTagQueryAnalyzer.kFilterTokens,
Expand Down
119 changes: 116 additions & 3 deletions test/hashtag_analyzer.dart
Expand Up @@ -2,8 +2,9 @@
// BSD 3-Clause License
// All rights reserved

import 'package:free_text_search/free_text_search.dart';
import 'package:hive_text_index/hive_text_index.dart';
import 'package:text_indexing/text_indexing.dart';
// import 'package:text_indexing/text_indexing.dart';
import 'package:text_indexing/type_definitions.dart';
import 'package:text_indexing/extensions.dart';
import 'package:hive/hive.dart';
Expand All @@ -14,6 +15,10 @@ class HashTagQueryAnalyzer extends English {
@override
TermModifier get stemmer => (term) => term.toLowerCase();


static WeightingStrategy kWeightingStrategy = WeightingStrategy(
zoneWeights: {'name': 1.0, 'description': 1.0}, positionThreshold: 0);

static Future<List<Token>> kFilterTokens(List<Token> tokens) async {
final retVal = <Token>[];
for (final token in tokens) {
Expand Down Expand Up @@ -762,7 +767,115 @@ class HashTagQueryAnalyzer extends English {
'hope',
'pressure',
'surge',
'storm'
'storm',
'half',
'securities',
'security',
'merchant',
'due',
'supply',
'focus',
'agree',
'tech',
'healthy',
'installed',
'advantage',
'mind',
'investors',
'wrapped',
'view',
'snap',
'artificial',
'intelligent',
'intelligence',
'ai',
'software',
'emerging',
'standard',
'cool',
'grow',
'americas',
'founders',
'point',
'fang',
'gas',
'natural',
'credit',
'federal',
'credits',
'general',
'parts',
'bank',
'giga',
'texas',
'line',
'&',
'mix',
'technical',
'industrial',
'automotive',
'far',
'near',
'deep',
'lighting',
'south',
'north',
'united',
'states',
'united states',
'complete',
'post',
'progress',
'progressive',
'exchange',
'white',
'house',
'black',
'aim',
'chain',
'forward',
'equity',
'home',
'trajectory',
'electric',
'reflect',
'research',
'interplay',
'morgan',
'ryan',
'metals',
'tailwind',
'net',
'booking',
'par',
'flow',
'prime',
'world',
'face',
'pace',
'american',
'hong',
'hong kong',
'sentiment',
'global',
'environment',
'environmental',
'waste',
'products',
'york',
'city',
'john',
'works',
'president',
'commercial',
'change',
'public',
'services',
'public company',
'cincinatti,',
'children',
'place',
'fair'
};
}

Expand All @@ -775,7 +888,7 @@ class HashTagAnalyzer extends English {
final symbol = getSymbolFromTicker(token.term);
if (symbol != null) {
retVal.add(Token(
symbol.toLowerCase(), token.n, token.termPosition, token.zone));
symbol.toLowerCase(), token.n, 0, token.zone));
} else {
retVal.add(token);
}
Expand Down

0 comments on commit 6d8b137

Please sign in to comment.