Permalink
Browse files

Add wildcard for attaching media

  • Loading branch information...
hollingsworthd committed Apr 9, 2015
1 parent c13625b commit a1d58fab051431c861cc4891d7e7ecbd031c1e1e
Showing with 25 additions and 13 deletions.
  1. +1 −0 .gitignore
  2. +4 −0 api/src/com/screenslicer/api/request/Query.java
  3. +20 −13 core/src/com/screenslicer/core/scrape/Scrape.java
View
@@ -4,6 +4,7 @@
**/fetch_local_cache
**/result_cache
**/download_cache*
**/media_cache*
screenslicer*.zip
ScreenSlicer*.zip
**/*.log.*
@@ -130,6 +130,10 @@
* Attach media to the result which match these HtmlNodes
*/
public HtmlNode[] media;
/**
* Whether to attach all media.
*/
public boolean allMedia;
/**
* Credentials for authentication
*/
@@ -317,20 +317,22 @@ public DownloadedFiles(int thread) {
Map<String, String> encodedBytes = new LinkedHashMap<String, String>();
Map<String, String> mimeTypes = new LinkedHashMap<String, String>();
public SavedMedia(String body, HtmlNode[] patterns, int thread) {
if (!CommonUtil.isEmpty(patterns)) {
public SavedMedia(String body, HtmlNode[] patterns, boolean allMedia, int thread) {
if (allMedia || !CommonUtil.isEmpty(patterns)) {
Document doc = CommonUtil.parse(body, null, false);
List<Element> elementsTmp = new ArrayList<Element>(doc.getElementsByAttribute("src"));
List<Element> elements = new ArrayList<Element>();
for (Element element : elementsTmp) {
for (int i = 0; i < patterns.length; i++) {
if (NodeUtil.matches(patterns[i], element)) {
elements.add(element);
break;
if (!CommonUtil.isEmpty(patterns)) {
for (Element element : elementsTmp) {
for (int i = 0; i < patterns.length; i++) {
if (NodeUtil.matches(patterns[i], element)) {
elements.add(element);
break;
}
}
}
}
if (!elements.isEmpty()) {
if (allMedia || !elements.isEmpty()) {
try {
File dir = new File("./media_cache" + thread);
Collection<File> list = FileUtils.listFiles(dir, new String[] { "content" }, false);
@@ -349,9 +351,14 @@ public SavedMedia(String body, HtmlNode[] patterns, int thread) {
&& !reportedMimeType.toLowerCase().contains("octet") ? reportedMimeType
: (!CommonUtil.isEmpty(detectedMimeType) ? detectedMimeType : reportedMimeType);
List<String> sources = sources(url, elements);
for (String src : sources) {
encodedBytes.put(src, content);
mimeTypes.put(src, mimeType);
if (sources.isEmpty() && allMedia) {
encodedBytes.put(url, content);
mimeTypes.put(url, mimeType);
} else {
for (String src : sources) {
encodedBytes.put(src, content);
mimeTypes.put(src, mimeType);
}
}
} catch (Throwable t) {
Log.exception(t);
@@ -415,7 +422,7 @@ private static void fetch(Browser browser, Context context) throws ActionFailed
context.newResults.get(i).pageBinaryExtension = downloaded.extension;
context.newResults.get(i).pageBinaryFilename = downloaded.filename;
SavedMedia media = new SavedMedia(context.newResults.get(i).pageHtml,
context.query.media, context.threadNum);
context.query.media, context.query.allMedia, context.threadNum);
context.newResults.get(i).mediaBinaries.putAll(media.encodedBytes);
context.newResults.get(i).mediaMimeTypes.putAll(media.mimeTypes);
if (!CommonUtil.isEmpty(context.newResults.get(i).pageHtml)) {
@@ -854,7 +861,7 @@ private static void handlePage(Context context) throws ActionFailed, End {
private static boolean hasMedia(Query query) {
Query cur = query;
while (cur != null) {
if (!CommonUtil.isEmpty(cur.media)) {
if (!CommonUtil.isEmpty(cur.media) || cur.allMedia) {
return true;
}
cur = query.keywordQuery == null ? query.formQuery : query.keywordQuery;

0 comments on commit a1d58fa

Please sign in to comment.