|
103 | 103 | - Synonyms can be put in syn.txt (see sample synonyms.txt file in solr example)
|
104 | 104 | - Word delimiter splits on CaseChange and numbers (e.g., code4lib).
|
105 | 105 | - The CJK stuff produces bigrams for those languages
|
106 |
| - - Remove Duplicates does what it says on the tin. |
107 | 106 | -->
|
108 | 107 | <fieldtype name="text" class="solr.TextField" positionIncrementGap="1000">
|
109 | 108 | <analyzer>
|
|
113 | 112 | <!-- <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/> -->
|
114 | 113 | <filter class="solr.CJKWidthFilterFactory"/>
|
115 | 114 | <filter class="solr.CJKBigramFilterFactory"/>
|
116 |
| - <filter class="solr.TrimFilterFactory"/> |
117 |
| - <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> |
118 | 115 | </analyzer>
|
119 | 116 | </fieldtype>
|
120 | 117 |
|
|
124 | 121 |
|
125 | 122 | <fieldtype name="text_lr" class="solr.TextField" positionIncrementGap="1000">
|
126 | 123 | <analyzer>
|
| 124 | + <charFilter class="solr.PatternReplaceCharFilterFactory" |
| 125 | + pattern="^(.*)$" replacement="AAAA $1 ZZZZ" /> |
127 | 126 | <tokenizer class="solr.ICUTokenizerFactory"/>
|
128 |
| - <charFilter class="solr.PatternReplaceCharFilterFactory" |
129 |
| - pattern="^(.*)$" replacement="AAAA $1 ZZZZ" /> |
130 | 127 | <filter class="solr.ICUFoldingFilterFactory"/>
|
131 | 128 | <filter class="solr.SynonymFilterFactory" synonyms="syn.txt" ignoreCase="true" expand="false"/>
|
132 | 129 | <filter class="solr.CJKWidthFilterFactory"/>
|
133 | 130 | <filter class="solr.CJKBigramFilterFactory"/>
|
134 |
| - <filter class="solr.TrimFilterFactory"/> |
135 |
| - <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> |
136 | 131 | </analyzer>
|
137 | 132 | </fieldtype>
|
138 | 133 |
|
139 | 134 | <fieldtype name="text_l" class="solr.TextField" positionIncrementGap="1000">
|
140 | 135 | <analyzer>
|
| 136 | + <charFilter class="solr.PatternReplaceCharFilterFactory" |
| 137 | + pattern="^(.*)$" replacement="AAAA $1" /> |
141 | 138 | <tokenizer class="solr.ICUTokenizerFactory"/>
|
142 | 139 | <filter class="solr.ICUFoldingFilterFactory"/>
|
143 | 140 | <filter class="solr.SynonymFilterFactory" synonyms="syn.txt" ignoreCase="true" expand="false"/>
|
144 | 141 | <filter class="solr.CJKWidthFilterFactory"/>
|
145 | 142 | <filter class="solr.CJKBigramFilterFactory"/>
|
146 |
| - <filter class="solr.TrimFilterFactory"/> |
147 |
| - <filter class="solr.PatternReplaceFilterFactory" |
148 |
| - pattern="\p{Z}+" replacement=" " |
149 |
| - /> |
150 |
| - <charFilter class="solr.PatternReplaceCharFilterFactory" |
151 |
| - pattern="^(.*)$" replacement="AAAA $1" /> |
152 |
| - <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> |
153 | 143 | </analyzer>
|
154 | 144 | </fieldtype>
|
155 | 145 |
|
|
0 commit comments