-
Notifications
You must be signed in to change notification settings - Fork 4.5k
/
LexicalTokenizerName.cs
84 lines (75 loc) · 6.66 KB
/
LexicalTokenizerName.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// <auto-generated/>
#nullable disable
using System;
using System.ComponentModel;
namespace Azure.Search.Documents.Indexes.Models
{
/// <summary> Defines the names of all tokenizers supported by the search engine. </summary>
public readonly partial struct LexicalTokenizerName : IEquatable<LexicalTokenizerName>
{
private readonly string _value;
/// <summary> Initializes a new instance of <see cref="LexicalTokenizerName"/>. </summary>
/// <exception cref="ArgumentNullException"> <paramref name="value"/> is null. </exception>
public LexicalTokenizerName(string value)
{
_value = value ?? throw new ArgumentNullException(nameof(value));
}
private const string ClassicValue = "classic";
private const string EdgeNGramValue = "edgeNGram";
private const string KeywordValue = "keyword_v2";
private const string LetterValue = "letter";
private const string LowercaseValue = "lowercase";
private const string MicrosoftLanguageTokenizerValue = "microsoft_language_tokenizer";
private const string MicrosoftLanguageStemmingTokenizerValue = "microsoft_language_stemming_tokenizer";
private const string NGramValue = "nGram";
private const string PathHierarchyValue = "path_hierarchy_v2";
private const string PatternValue = "pattern";
private const string StandardValue = "standard_v2";
private const string UaxUrlEmailValue = "uax_url_email";
private const string WhitespaceValue = "whitespace";
/// <summary> Grammar-based tokenizer that is suitable for processing most European-language documents. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/standard/ClassicTokenizer.html. </summary>
public static LexicalTokenizerName Classic { get; } = new LexicalTokenizerName(ClassicValue);
/// <summary> Tokenizes the input from an edge into n-grams of the given size(s). See https://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.html. </summary>
public static LexicalTokenizerName EdgeNGram { get; } = new LexicalTokenizerName(EdgeNGramValue);
/// <summary> Emits the entire input as a single token. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/KeywordTokenizer.html. </summary>
public static LexicalTokenizerName Keyword { get; } = new LexicalTokenizerName(KeywordValue);
/// <summary> Divides text at non-letters. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/LetterTokenizer.html. </summary>
public static LexicalTokenizerName Letter { get; } = new LexicalTokenizerName(LetterValue);
/// <summary> Divides text at non-letters and converts them to lower case. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/LowerCaseTokenizer.html. </summary>
public static LexicalTokenizerName Lowercase { get; } = new LexicalTokenizerName(LowercaseValue);
/// <summary> Divides text using language-specific rules. </summary>
public static LexicalTokenizerName MicrosoftLanguageTokenizer { get; } = new LexicalTokenizerName(MicrosoftLanguageTokenizerValue);
/// <summary> Divides text using language-specific rules and reduces words to their base forms. </summary>
public static LexicalTokenizerName MicrosoftLanguageStemmingTokenizer { get; } = new LexicalTokenizerName(MicrosoftLanguageStemmingTokenizerValue);
/// <summary> Tokenizes the input into n-grams of the given size(s). See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ngram/NGramTokenizer.html. </summary>
public static LexicalTokenizerName NGram { get; } = new LexicalTokenizerName(NGramValue);
/// <summary> Tokenizer for path-like hierarchies. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/path/PathHierarchyTokenizer.html. </summary>
public static LexicalTokenizerName PathHierarchy { get; } = new LexicalTokenizerName(PathHierarchyValue);
/// <summary> Tokenizer that uses regex pattern matching to construct distinct tokens. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/pattern/PatternTokenizer.html. </summary>
public static LexicalTokenizerName Pattern { get; } = new LexicalTokenizerName(PatternValue);
/// <summary> Standard Lucene analyzer; Composed of the standard tokenizer, lowercase filter and stop filter. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/standard/StandardTokenizer.html. </summary>
public static LexicalTokenizerName Standard { get; } = new LexicalTokenizerName(StandardValue);
/// <summary> Tokenizes urls and emails as one token. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.html. </summary>
public static LexicalTokenizerName UaxUrlEmail { get; } = new LexicalTokenizerName(UaxUrlEmailValue);
/// <summary> Divides text at whitespace. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/WhitespaceTokenizer.html. </summary>
public static LexicalTokenizerName Whitespace { get; } = new LexicalTokenizerName(WhitespaceValue);
/// <summary> Determines if two <see cref="LexicalTokenizerName"/> values are the same. </summary>
public static bool operator ==(LexicalTokenizerName left, LexicalTokenizerName right) => left.Equals(right);
/// <summary> Determines if two <see cref="LexicalTokenizerName"/> values are not the same. </summary>
public static bool operator !=(LexicalTokenizerName left, LexicalTokenizerName right) => !left.Equals(right);
/// <summary> Converts a string to a <see cref="LexicalTokenizerName"/>. </summary>
public static implicit operator LexicalTokenizerName(string value) => new LexicalTokenizerName(value);
/// <inheritdoc />
[EditorBrowsable(EditorBrowsableState.Never)]
public override bool Equals(object obj) => obj is LexicalTokenizerName other && Equals(other);
/// <inheritdoc />
public bool Equals(LexicalTokenizerName other) => string.Equals(_value, other._value, StringComparison.InvariantCultureIgnoreCase);
/// <inheritdoc />
[EditorBrowsable(EditorBrowsableState.Never)]
public override int GetHashCode() => _value?.GetHashCode() ?? 0;
/// <inheritdoc />
public override string ToString() => _value;
}
}