|
95 | 95 | </analyzer>
|
96 | 96 | </fieldtype>
|
97 | 97 |
|
| 98 | + <!-- text - A standard text type, with icu tokenization and unicode normalization. |
| 99 | + - With the ICUFolding, we get: |
| 100 | + + NFKC normalization (precomosing), |
| 101 | + + Unicode case folding (i.e., lowercasing) |
| 102 | + + search term folding (removing accents, etc). |
| 103 | + - Synonyms can be put in syn.txt (see sample synonyms.txt file in solr example) |
| 104 | + - Word delimiter splits on CaseChange and numbers (e.g., code4lib). |
| 105 | + - The CJK stuff produces bigrams for those languages |
| 106 | + - Remove Duplicates does what it says on the tin. |
| 107 | + --> |
| 108 | + <fieldtype name="text" class="solr.TextField" positionIncrementGap="1000"> |
| 109 | + <analyzer> |
| 110 | + <tokenizer class="solr.ICUTokenizerFactory"/> |
| 111 | + <filter class="solr.ICUFoldingFilterFactory"/> |
| 112 | + <filter class="solr.SynonymFilterFactory" synonyms="syn.txt" ignoreCase="true" expand="false"/> |
| 113 | + <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/> |
| 114 | + <filter class="solr.CJKWidthFilterFactory"/> |
| 115 | + <filter class="solr.CJKBigramFilterFactory"/> |
| 116 | + <filter class="solr.TrimFilterFactory"/> |
| 117 | + <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> |
| 118 | + </analyzer> |
| 119 | + </fieldtype> |
| 120 | + |
| 121 | + |
| 122 | + |
98 | 123 | </types>
|
99 | 124 |
|
100 | 125 | <fields>
|
101 | 126 | <field name="id" type="string" indexed="true" stored="true" />
|
102 |
| - <field name="*_numeric" type="numericID" indexed="true" stored="true" multiValued="true"/> |
| 127 | + <dynamicField name="*_numeric" type="numericID" indexed="true" stored="true" multiValued="true"/> |
| 128 | + <dynamicField name="*_text" type="text" indexed="true" stored="true" multiValued="true"/> |
103 | 129 | </fields>
|
104 | 130 |
|
105 | 131 |
|
|
0 commit comments