Skip to content

Commit e2a69e3

Browse files
committed
Getting ready for SST #2
1 parent cefe429 commit e2a69e3

File tree

2 files changed

+43
-20
lines changed

2 files changed

+43
-20
lines changed

exampledocs/names.xml

+16-19
Original file line numberDiff line numberDiff line change
@@ -14,23 +14,20 @@
1414
See the License for the specific language governing permissions and
1515
limitations under the License.
1616
-->
17+
<update>
18+
<delete><query>*:*</query></delete>
19+
<commit/>
20+
<add>
21+
<doc>
22+
<field name="id">1</field>
23+
<field name="name_text">William John James Dueber</field>
24+
<field name="name_text">Rufus Xavier Sarsparella</field>
25+
</doc>
26+
<doc>
27+
<field name="id">2</field>
28+
<field name="name_text">Mike Dueber</field>
29+
<field name="name_text">William Penn</field>
30+
</doc>
31+
</add>
1732

18-
<add>
19-
<doc>
20-
<field name="id">SP2514N</field>
21-
<field name="text">Just a test of the anchor</field>
22-
<field name="tf">Just a test of the anchor</field>
23-
<field name="tl">Just a test of the anchor</field>
24-
<field name="tr">Just a test of the anchor</field>
25-
<field name="numeric">ISBN13: 12345-2234X (behind stacks from 1990)</field>
26-
<field name="numeric">ISBN134455</field>
27-
<field name="csn">1234-5678, 11223344, 123456-89-9X, 111</field>
28-
<field name="name">William James John Dueber</field>
29-
<field name="name">Jesus H. Tapdancing Christ on a pogo stick with his sister nancy</field>
30-
<field name="othername">Rufus Xavier Sarsaparilla</field>
31-
<field name="othername">Jumping Jack Flash</field>
32-
33-
</doc>
34-
35-
</add>
36-
33+
</update>

solr/conf/schema.xml

+27-1
Original file line numberDiff line numberDiff line change
@@ -95,11 +95,37 @@
9595
</analyzer>
9696
</fieldtype>
9797

98+
<!-- text - A standard text type, with icu tokenization and unicode normalization.
99+
- With the ICUFolding, we get:
100+
+ NFKC normalization (precomosing),
101+
+ Unicode case folding (i.e., lowercasing)
102+
+ search term folding (removing accents, etc).
103+
- Synonyms can be put in syn.txt (see sample synonyms.txt file in solr example)
104+
- Word delimiter splits on CaseChange and numbers (e.g., code4lib).
105+
- The CJK stuff produces bigrams for those languages
106+
- Remove Duplicates does what it says on the tin.
107+
-->
108+
<fieldtype name="text" class="solr.TextField" positionIncrementGap="1000">
109+
<analyzer>
110+
<tokenizer class="solr.ICUTokenizerFactory"/>
111+
<filter class="solr.ICUFoldingFilterFactory"/>
112+
<filter class="solr.SynonymFilterFactory" synonyms="syn.txt" ignoreCase="true" expand="false"/>
113+
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
114+
<filter class="solr.CJKWidthFilterFactory"/>
115+
<filter class="solr.CJKBigramFilterFactory"/>
116+
<filter class="solr.TrimFilterFactory"/>
117+
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
118+
</analyzer>
119+
</fieldtype>
120+
121+
122+
98123
</types>
99124

100125
<fields>
101126
<field name="id" type="string" indexed="true" stored="true" />
102-
<field name="*_numeric" type="numericID" indexed="true" stored="true" multiValued="true"/>
127+
<dynamicField name="*_numeric" type="numericID" indexed="true" stored="true" multiValued="true"/>
128+
<dynamicField name="*_text" type="text" indexed="true" stored="true" multiValued="true"/>
103129
</fields>
104130

105131

0 commit comments

Comments
 (0)