Skip to content

Commit 61b48f0

Browse files
author
Emmanuel Hugonnet
committed
Updating Tika version to 1.2
Removing unused parsers and code
1 parent 6a9edd0 commit 61b48f0

File tree

8 files changed

+178
-161
lines changed

8 files changed

+178
-161
lines changed

lib-core/src/main/java/com/silverpeas/util/MetaData.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030

3131
import org.apache.tika.metadata.Metadata;
3232
import org.apache.tika.metadata.Property;
33+
import org.apache.tika.metadata.TikaCoreProperties;
3334

3435
import com.stratelia.webactiv.util.DateUtil;
3536

@@ -55,7 +56,7 @@ public List<String> getAvailablePropertyNames() {
5556
* @return String
5657
*/
5758
public String getTitle() {
58-
return metadata.get(Metadata.TITLE);
59+
return metadata.get(TikaCoreProperties.TITLE);
5960
}
6061

6162
/**
@@ -107,8 +108,8 @@ public int getSecurity() {
107108
*
108109
* @return String
109110
*/
110-
public String getKeywords() {
111-
return metadata.get(Metadata.KEYWORDS);
111+
public String[] getKeywords() {
112+
return metadata.getValues(Metadata.KEYWORDS);
112113
}
113114

114115
/**
@@ -141,9 +142,9 @@ public Date getLastSaveDateTime() {
141142
* Return CreateDateTime of an Office document
142143
*/
143144
public Date getCreationDate() {
144-
Date result = getDate(Metadata.CREATION_DATE);
145+
Date result = getDate(TikaCoreProperties.CREATED);
145146
if (result == null) {
146-
result = metadata.getDate(Metadata.DATE_CREATED);
147+
result = metadata.getDate(TikaCoreProperties.CREATED);
147148
}
148149
if (result == null) {
149150
result = metadata.getDate(Metadata.DATE);

lib-core/src/main/java/com/silverpeas/util/MetadataExtractor.java

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -30,18 +30,9 @@
3030
import java.io.IOException;
3131
import java.io.InputStream;
3232
import java.io.Reader;
33-
import java.util.Map;
3433
import org.apache.commons.io.IOUtils;
3534
import org.apache.tika.Tika;
36-
import org.apache.tika.config.TikaConfig;
3735
import org.apache.tika.metadata.Metadata;
38-
import org.apache.tika.mime.MediaType;
39-
import org.apache.tika.parser.CompositeParser;
40-
import org.apache.tika.parser.ParseContext;
41-
import org.apache.tika.parser.Parser;
42-
import org.apache.tika.parser.microsoft.OfficeParser;
43-
import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
44-
import org.apache.tika.parser.odf.OpenDocumentParser;
4536

4637
public class MetadataExtractor {
4738

@@ -69,24 +60,7 @@ public MetaData extractMetadata(String fileName) {
6960

7061
private MetaData getMetadata(InputStream inputStream) throws IOException {
7162
Metadata metadata = new Metadata();
72-
TikaConfig configuration = TikaConfig.getDefaultConfig();
73-
ParseContext context = new ParseContext();
74-
CompositeParser parser = ((CompositeParser) configuration.getParser());
75-
Parser openOfficeParser = new OpenDocumentParser();
76-
Map<MediaType, Parser> parsers = parser.getParsers(context);
77-
for (MediaType type : openOfficeParser.getSupportedTypes(context)) {
78-
parsers.put(type, openOfficeParser);
79-
}
80-
Parser officeParser = new OfficeParser();
81-
for (MediaType type : officeParser.getSupportedTypes(context)) {
82-
parsers.put(type, officeParser);
83-
}
84-
Parser ooxmlParser = new OOXMLParser();
85-
for (MediaType type : ooxmlParser.getSupportedTypes(context)) {
86-
parsers.put(type, ooxmlParser);
87-
}
88-
parser.setParsers(parsers);
89-
Tika tika = new Tika(configuration);
63+
Tika tika = new Tika();
9064
Reader reader = tika.parse(inputStream, metadata);
9165
reader.close();
9266
return new MetaData(metadata);
Lines changed: 34 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,22 @@
11
/**
22
* Copyright (C) 2000 - 2012 Silverpeas
33
*
4-
* This program is free software: you can redistribute it and/or modify
5-
* it under the terms of the GNU Affero General Public License as
6-
* published by the Free Software Foundation, either version 3 of the
7-
* License, or (at your option) any later version.
4+
* This program is free software: you can redistribute it and/or modify it under the terms of the
5+
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
6+
* of the License, or (at your option) any later version.
87
*
9-
* As a special exception to the terms and conditions of version 3.0 of
10-
* the GPL, you may redistribute this Program in connection with Free/Libre
11-
* Open Source Software ("FLOSS") applications as described in Silverpeas's
12-
* FLOSS exception. You should have received a copy of the text describing
13-
* the FLOSS exception, and it is also available here:
8+
* As a special exception to the terms and conditions of version 3.0 of the GPL, you may
9+
* redistribute this Program in connection with Free/Libre Open Source Software ("FLOSS")
10+
* applications as described in Silverpeas's FLOSS exception. You should have received a copy of the
11+
* text describing the FLOSS exception, and it is also available here:
1412
* "http://www.silverpeas.org/docs/core/legal/floss_exception.html"
1513
*
16-
* This program is distributed in the hope that it will be useful,
17-
* but WITHOUT ANY WARRANTY; without even the implied warranty of
18-
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19-
* GNU Affero General Public License for more details.
14+
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
15+
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16+
* Affero General Public License for more details.
2017
*
21-
* You should have received a copy of the GNU Affero General Public License
22-
* along with this program. If not, see <http://www.gnu.org/licenses/>.
18+
* You should have received a copy of the GNU Affero General Public License along with this program.
19+
* If not, see <http://www.gnu.org/licenses/>.
2320
*/
2421
package org.silverpeas.search.indexEngine.model;
2522

@@ -28,62 +25,47 @@
2825
import java.util.Collections;
2926
import java.util.List;
3027

28+
import com.silverpeas.util.FileUtil;
3129
import com.stratelia.silverpeas.silvertrace.SilverTrace;
32-
import com.stratelia.webactiv.util.FileRepositoryManager;
33-
import com.stratelia.webactiv.util.ResourceLocator;
3430

3531
/**
3632
* An RepositoryIndexer allow to index files in a whole repository except the directories
3733
*/
3834
public class RepositoryIndexer {
3935

36+
public static final String ADD_ACTION = "add";
37+
public static final String REMOVE_ACTION = "remove";
4038
private String spaceId = null;
4139
private String componentId = null;
4240
private int count = 0;
4341
private char separator;
44-
ResourceLocator resource = null;
45-
IndexManager indexManager = null;
42+
private IndexManager indexManager = new IndexManager();
4643

4744
public RepositoryIndexer(String spaceId, String componentId) {
4845
SilverTrace.debug("indexEngine", "RepositoryIndexer.RepositoryIndexer()",
4946
"root.MSG_GEN_PARAM_VALUE", "spaceId=" + spaceId + " ComponentId=" + componentId);
50-
setSpaceId(spaceId);
51-
setComponentId(componentId);
52-
resource = new ResourceLocator(
53-
"com.stratelia.webactiv.util.attachment.mime_types", "fr");
54-
indexManager = new IndexManager();
55-
}
56-
57-
public final void setSpaceId(String spaceId) {
5847
this.spaceId = spaceId;
48+
this.componentId = componentId;
5949
}
6050

6151
public String getSpaceId() {
6252
return spaceId;
6353
}
6454

65-
public final void setComponentId(String componentId) {
66-
this.componentId = componentId;
67-
}
68-
6955
public String getComponentId() {
7056
return componentId;
7157
}
7258

73-
public void pathIndexer(String path, String creationDate, String creatorId,
74-
String action) {
75-
SilverTrace.debug("indexEngine", "RepositoryIndexer.pathIndexer()",
76-
"root.MSG_GEN_ENTER_METHOD", "path=" + path);
77-
59+
public void pathIndexer(String path, String creationDate, String creatorId, String action) {
60+
SilverTrace.debug("indexEngine", "RepositoryIndexer.pathIndexer()", "root.MSG_GEN_ENTER_METHOD",
61+
"path=" + path);
7862
separator = path.charAt(0);
79-
SilverTrace.debug("indexEngine", "RepositoryIndexer.pathIndexer()",
80-
"root.MSG_GEN_PARAM_VALUE", "separator = " + separator);
81-
63+
SilverTrace.debug("indexEngine", "RepositoryIndexer.pathIndexer()", "root.MSG_GEN_PARAM_VALUE",
64+
"separator = " + separator);
8265
File dir = new File(path);
8366
if (dir.isDirectory()) {
8467
// index directory
8568
indexDirectory(action, creationDate, creatorId, dir);
86-
8769
// index directory's content
8870
processFileList(dir, creationDate, creatorId, action);
8971
}
@@ -94,6 +76,7 @@ public void pathIndexer(String path, String creationDate, String creatorId,
9476

9577
/**
9678
* Recursive function which covers directories. For each file, the file is indexed.
79+
*
9780
* @param fileList an array which contains directories and files
9881
* @param path the current path
9982
* @param currentDirectoryName the current directory name
@@ -125,35 +108,33 @@ private void processFileList(File dir, String creationDate, String creatorId, St
125108
}
126109
}
127110

128-
private void indexDirectory(String action, String creationDate,
129-
String creatorId, File directory) {
130-
if ("add".equals(action)) {
111+
private void indexDirectory(String action, String creationDate, String creatorId, File directory) {
112+
if (ADD_ACTION.equals(action)) {
131113
// indexer le répertoire
132-
FullIndexEntry fullIndexEntry = new FullIndexEntry(getComponentId(), "LinkedDir",
133-
directory.getPath());
114+
FullIndexEntry fullIndexEntry = new FullIndexEntry(getComponentId(), "LinkedDir",
115+
directory.getPath());
134116
fullIndexEntry.setTitle(directory.getName());
135117
fullIndexEntry.setCreationDate(creationDate);
136118
fullIndexEntry.setCreationUser(creatorId);
137119
indexManager.addIndexEntry(fullIndexEntry);
138120
count++;
139-
} else if ("remove".equals(action)) {
121+
} else if (REMOVE_ACTION.equals(action)) {
140122
IndexEntryPK indexEntry = new IndexEntryPK(getComponentId(), "LinkedDir", directory.getPath());
141123
indexManager.removeIndexEntry(indexEntry);
142124
}
143125
}
144126

145-
public void indexFile(String action, String creationDate, String creatorId,
146-
File file) {
127+
public void indexFile(String action, String creationDate, String creatorId, File file) {
147128
indexFile(action, creationDate, creatorId, file, true);
148129
}
149130

150-
public void indexFile(String action, String creationDate, String creatorId,
151-
File file, boolean closeIndex) {
131+
public void indexFile(String action, String creationDate, String creatorId, File file,
132+
boolean closeIndex) {
152133
// String path = currentPath + separator + fileName;
153134

154135
String filePath = file.getPath();
155136

156-
if ("add".equals(action)) {
137+
if (ADD_ACTION.equals(action)) {
157138
String fileName = file.getName();
158139

159140
// Add file in index
@@ -172,13 +153,13 @@ public void indexFile(String action, String creationDate, String creatorId,
172153
fullIndexEntry.setCreationUser(creatorId);
173154

174155
if (haveGotExtension && !fileName.startsWith("~")) {
175-
String format = getMimeType(fileName);
156+
String format = FileUtil.getMimeType(fileName);
176157
String lang = "fr";
177158
fullIndexEntry.addFileContent(filePath, null, format, lang);
178159
}
179160
indexManager.addIndexEntry(fullIndexEntry);
180161
count++;
181-
} else if ("remove".equals(action)) { // Remove file from index
162+
} else if (REMOVE_ACTION.equals(action)) { // Remove file from index
182163
IndexEntryPK indexEntry = new IndexEntryPK(getComponentId(), "LinkedFile", filePath);
183164
indexManager.removeIndexEntry(indexEntry);
184165
}
@@ -187,13 +168,4 @@ public void indexFile(String action, String creationDate, String creatorId,
187168
indexManager.optimize();
188169
}
189170
}
190-
191-
private String getMimeType(String fileName) {
192-
String mimeType = null;
193-
String fileExtension = FileRepositoryManager.getFileExtension(fileName);
194-
if (resource != null && fileExtension != null) {
195-
mimeType = resource.getString(fileExtension.toLowerCase());
196-
}
197-
return mimeType;
198-
}
199171
}

lib-core/src/main/java/org/silverpeas/search/indexEngine/parser/tika/TikaParser.java

Lines changed: 1 addition & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,7 @@
2626
import java.io.IOException;
2727
import java.io.Reader;
2828
import java.io.StringReader;
29-
import java.util.Map;
3029
import org.apache.tika.Tika;
31-
import org.apache.tika.config.TikaConfig;
32-
import org.apache.tika.mime.MediaType;
33-
import org.apache.tika.parser.CompositeParser;
34-
import org.apache.tika.parser.ParseContext;
35-
import org.apache.tika.parser.microsoft.OfficeParser;
36-
import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
37-
import org.apache.tika.parser.odf.OpenDocumentParser;
3830
import org.silverpeas.search.indexEngine.parser.Parser;
3931

4032
/**
@@ -46,24 +38,7 @@ public class TikaParser implements Parser {
4638
private final Tika tika = initTika();
4739

4840
private Tika initTika() {
49-
TikaConfig configuration = TikaConfig.getDefaultConfig();
50-
ParseContext context = new ParseContext();
51-
CompositeParser parser = ((CompositeParser) configuration.getParser());
52-
org.apache.tika.parser.Parser openOfficeParser = new OpenDocumentParser();
53-
Map<MediaType, org.apache.tika.parser.Parser> parsers = parser.getParsers(context);
54-
for (MediaType type : openOfficeParser.getSupportedTypes(context)) {
55-
parsers.put(type, openOfficeParser);
56-
}
57-
org.apache.tika.parser.Parser officeParser = new OfficeParser();
58-
for (MediaType type : officeParser.getSupportedTypes(context)) {
59-
parsers.put(type, officeParser);
60-
}
61-
org.apache.tika.parser.Parser ooxmlParser = new OOXMLParser();
62-
for (MediaType type : ooxmlParser.getSupportedTypes(context)) {
63-
parsers.put(type, ooxmlParser);
64-
}
65-
parser.setParsers(parsers);
66-
return new Tika(configuration);
41+
return new Tika();
6742
}
6843

6944
@Override

0 commit comments

Comments
 (0)