Permalink
Browse files

Initial commit

  • Loading branch information...
0 parents commit 8f595673f34e120a8f1b4b4dd619f7592e2914ea @Acerbic committed Jul 23, 2011
20 .classpath
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+ <classpathentry kind="src" path="src"/>
+ <classpathentry kind="lib" path="lib/entagged-audioformats-0.15.jar"/>
+ <classpathentry kind="lib" path="lib/jaxen-1.1.3/jaxen-1.1.3.jar">
+ <attributes>
+ <attribute name="javadoc_location" value="file:/D:/Gleb/JavaWorkspace_Eclipse/Dloader/lib/jaxen-1.1.3/docs/apidocs/"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry kind="lib" path="lib/tagsoup-1.2.jar"/>
+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
+ <classpathentry kind="lib" path="lib/jdom/lib/xerces.jar"/>
+ <classpathentry kind="lib" path="lib/jdom/lib/xml-apis.jar"/>
+ <classpathentry kind="lib" path="lib/jdom/build/jdom.jar">
+ <attributes>
+ <attribute name="javadoc_location" value="file:/D:/Gleb/JavaWorkspace_Eclipse/Dloader/lib/jdom/build/apidocs/"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry kind="output" path="build"/>
+</classpath>
60 .project
@@ -0,0 +1,60 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+ <name>Dloader</name>
+ <comment></comment>
+ <projects>
+ </projects>
+ <buildSpec>
+ <buildCommand>
+ <name>org.eclipse.jdt.core.javabuilder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ </buildSpec>
+ <natures>
+ <nature>org.eclipse.jdt.core.javanature</nature>
+ </natures>
+ <filteredResources>
+ <filter>
+ <id>1311424183233</id>
+ <name></name>
+ <type>6</type>
+ <matcher>
+ <id>org.eclipse.ui.ide.multiFilter</id>
+ <arguments>1.0-name-matches-false-false-*.log</arguments>
+ </matcher>
+ </filter>
+ <filter>
+ <id>1311424183264</id>
+ <name></name>
+ <type>6</type>
+ <matcher>
+ <id>org.eclipse.ui.ide.multiFilter</id>
+ <arguments>1.0-name-matches-false-false-*cache.xml</arguments>
+ </matcher>
+ </filter>
+ <filter>
+ <id>1311424183264</id>
+ <name></name>
+ <type>22</type>
+ <matcher>
+ <id>org.eclipse.ui.ide.andFilterMatcher</id>
+ <arguments>
+ <matcher>
+ <id>org.eclipse.ui.ide.multiFilter</id>
+ <arguments>1.0-projectRelativePath-matches-false-false-lib*</arguments>
+ </matcher>
+ <matcher>
+ <id>org.eclipse.ui.ide.notFilterMatcher</id>
+ <arguments>
+ <matcher>
+ <id>org.eclipse.ui.ide.multiFilter</id>
+ <arguments>1.0-name-matches-false-false-*.jar</arguments>
+ </matcher>
+ </arguments>
+ </matcher>
+ </arguments>
+ </matcher>
+ </filter>
+ </filteredResources>
+</projectDescription>
12 .settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,12 @@
+#Thu Apr 21 02:48:51 MSD 2011
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
+org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
+org.eclipse.jdt.core.compiler.compliance=1.6
+org.eclipse.jdt.core.compiler.debug.lineNumber=generate
+org.eclipse.jdt.core.compiler.debug.localVariable=generate
+org.eclipse.jdt.core.compiler.debug.sourceFile=generate
+org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
+org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
+org.eclipse.jdt.core.compiler.source=1.6
4 .settings/org.eclipse.jdt.ui.prefs
@@ -0,0 +1,4 @@
+#Mon Apr 25 21:28:52 MSD 2011
+eclipse.preferences.version=1
+org.eclipse.jdt.ui.javadoc=false
+org.eclipse.jdt.ui.text.custom_code_templates=<?xml version\="1.0" encoding\="UTF-8" standalone\="no"?><templates><template autoinsert\="true" context\="gettercomment_context" deleted\="false" description\="Comment for getter method" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.gettercomment" name\="gettercomment">/**\r\n * @return the ${bare_field_name}\r\n */</template><template autoinsert\="true" context\="settercomment_context" deleted\="false" description\="Comment for setter method" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.settercomment" name\="settercomment">/**\r\n * @param ${param} the ${bare_field_name} to set\r\n */</template><template autoinsert\="true" context\="constructorcomment_context" deleted\="false" description\="Comment for created constructors" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.constructorcomment" name\="constructorcomment">/**\r\n * ${tags}\r\n */</template><template autoinsert\="true" context\="filecomment_context" deleted\="false" description\="Comment for created Java files" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.filecomment" name\="filecomment">/**\r\n * \r\n */</template><template autoinsert\="false" context\="typecomment_context" deleted\="false" description\="Comment for created types" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.typecomment" name\="typecomment">/**\r\n * @author A.Cerbic\r\n * ${tags}\r\n */</template><template autoinsert\="true" context\="fieldcomment_context" deleted\="false" description\="Comment for fields" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.fieldcomment" name\="fieldcomment">/**\r\n * \r\n */</template><template autoinsert\="true" context\="methodcomment_context" deleted\="false" description\="Comment for non-overriding methods" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.methodcomment" name\="methodcomment">/**\r\n * ${tags}\r\n */</template><template autoinsert\="true" context\="overridecomment_context" deleted\="false" description\="Comment for overriding methods" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.overridecomment" name\="overridecomment">/* (non-Javadoc)\r\n * ${see_to_overridden}\r\n */</template><template autoinsert\="true" context\="delegatecomment_context" deleted\="false" description\="Comment for delegate methods" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.delegatecomment" name\="delegatecomment">/**\r\n * ${tags}\r\n * ${see_to_target}\r\n */</template><template autoinsert\="true" context\="newtype_context" deleted\="false" description\="Newly created files" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.newtype" name\="newtype">${filecomment}\r\n${package_declaration}\r\n\r\n${typecomment}\r\n${type_declaration}</template><template autoinsert\="true" context\="classbody_context" deleted\="false" description\="Code in new class type bodies" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.classbody" name\="classbody">\r\n</template><template autoinsert\="true" context\="interfacebody_context" deleted\="false" description\="Code in new interface type bodies" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.interfacebody" name\="interfacebody">\r\n</template><template autoinsert\="true" context\="enumbody_context" deleted\="false" description\="Code in new enum type bodies" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.enumbody" name\="enumbody">\r\n</template><template autoinsert\="true" context\="annotationbody_context" deleted\="false" description\="Code in new annotation type bodies" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.annotationbody" name\="annotationbody">\r\n</template><template autoinsert\="true" context\="catchblock_context" deleted\="false" description\="Code in new catch blocks" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.catchblock" name\="catchblock">// ${todo} Auto-generated catch block\r\n${exception_var}.printStackTrace();</template><template autoinsert\="true" context\="methodbody_context" deleted\="false" description\="Code in created method stubs" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.methodbody" name\="methodbody">// ${todo} Auto-generated method stub\r\n${body_statement}</template><template autoinsert\="true" context\="constructorbody_context" deleted\="false" description\="Code in created constructor stubs" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.constructorbody" name\="constructorbody">${body_statement}\r\n// ${todo} Auto-generated constructor stub</template><template autoinsert\="true" context\="getterbody_context" deleted\="false" description\="Code in created getters" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.getterbody" name\="getterbody">return ${field};</template><template autoinsert\="true" context\="setterbody_context" deleted\="false" description\="Code in created setters" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.setterbody" name\="setterbody">${field} \= ${param};</template></templates>
3 .settings/org.eclipse.ltk.core.refactoring.prefs
@@ -0,0 +1,3 @@
+#Sat Apr 23 16:38:40 MSD 2011
+eclipse.preferences.version=1
+org.eclipse.ltk.core.refactoring.enable.project.refactoring.history=false
1 Bandcamp_Dloader.cmd
@@ -0,0 +1 @@
+@java -cp "bin/jdom.jar;bin;bin/entagged-audioformats-0.15.jar" Main %*
BIN lib/entagged-audioformats-0.15.jar
Binary file not shown.
BIN lib/jaxen-1.1.3/jaxen-1.1.3.jar
Binary file not shown.
BIN lib/jdom/build/jdom.jar
Binary file not shown.
BIN lib/jdom/lib/ant.jar
Binary file not shown.
BIN lib/jdom/lib/jaxen.jar
Binary file not shown.
BIN lib/jdom/lib/saxpath.jar
Binary file not shown.
BIN lib/jdom/lib/xalan.jar
Binary file not shown.
BIN lib/jdom/lib/xerces.jar
Binary file not shown.
BIN lib/jdom/lib/xml-apis.jar
Binary file not shown.
BIN lib/tagsoup-1.2.jar
Binary file not shown.
110 src/Album.java
@@ -0,0 +1,110 @@
+import java.io.File;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.List;
+import java.util.logging.Level;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.jdom.Document;
+import org.jdom.Element;
+
+public class Album extends PageParser {
+
+ public URL coverUrl;
+ public String moreInfo;
+ private int trackCounter;
+
+ public Album(URL url) throws IllegalArgumentException {super(url);}
+
+ public Album(String s) throws IllegalArgumentException {super(s);}
+
+ public Album() {super();}
+
+ @Override
+ public void saveResult(File saveTo) throws IOException {
+ File f = new File(saveTo, getFSSafeName(title));
+ if (!f.exists())
+ if (!f.mkdirs()) {
+ logger.severe(String.format("Directory creation failed (%s)%n",
+ f.getAbsolutePath()));
+ return;
+ }
+ logger.info( String.format("(album) \"%s\": %s%n", title, url.toString()));
+ WebDownloader.fetchWebFile(coverUrl, new File(f, "cover.jpg"));
+ if (childPages != null) {
+ logger.info( String.format("\t Downloading tracks (%d):%n",
+ childPages.length));
+ for (int i = 0; i < childPages.length; i++)
+ if (childPages[i] != null) {
+ logger.info( String.format("\t\t%d. ", i + 1));
+ childPages[i].saveResult(f);
+ }
+ }
+ }
+
+ @Override
+ protected void readCacheSelf(Element e) throws ProblemsReadingDocumentException{
+ try {
+ coverUrl = resolveLink(e.getAttributeValue("coverUrl"));
+ } catch (MalformedURLException e1) {
+ throw new ProblemsReadingDocumentException(e1);
+ }
+ moreInfo = e.getAttributeValue("moreInfo");
+ }
+
+ @Override
+ protected Element getSpecificDataXML() {
+ Element e = new Element("Album");
+ if (coverUrl != null) e.setAttribute("coverUrl", coverUrl.toString());
+ if (moreInfo != null) e.setAttribute("moreInfo", moreInfo);
+ return e;
+ }
+
+ @Override
+ protected PageParser parseChild(Element element) throws ProblemsReadingDocumentException {
+ try {
+ trackCounter++; // that includes counting for failed parsing
+ String s = element.getAttributeValue("href");
+ Track t = new Track(s);
+ t.title = element.getText();
+ t.setProperty("track", String.valueOf(trackCounter));
+ return t;
+ } catch (IllegalArgumentException e) {
+ throw new ProblemsReadingDocumentException(e);
+ } catch (NullPointerException e) {
+ throw new ProblemsReadingDocumentException(e);
+ }
+ }
+
+ @Override
+ protected void parseSelf(Document doc) throws ProblemsReadingDocumentException {
+ @SuppressWarnings("unchecked")
+ List<Element> imgList = (List<Element>) queryXPathList("//pre:div[@id='tralbumArt']/pre:img", doc);
+ if (imgList.size() > 0) {
+ try {
+ coverUrl = resolveLink((imgList.get(0)).getAttributeValue("src"));
+ } catch (MalformedURLException e) {
+ logger.log(Level.WARNING, "--no album art--", e);
+ }
+ }
+
+ @SuppressWarnings("unchecked")
+ List<Element> scriptList = (List<Element>) queryXPathList("//pre:script", doc);
+ for (int i = 0; i<scriptList.size(); i++) {
+ Pattern x = Pattern.compile(".*album_title : \"([^\"]*)\".*", Pattern.DOTALL);
+ Matcher m = x.matcher(scriptList.get(i).getText());
+ if (m.matches()) {
+ title = m.group(1);
+ break;
+ }
+ }
+ }
+
+ @Override
+ protected String getChildNodesXPath() {
+ return "//pre:table[@id='track_table']//pre:td/pre:div[@class='title']//pre:a";
+ }
+
+}
104 src/Discography.java
@@ -0,0 +1,104 @@
+import java.io.File;
+import java.io.IOException;
+import java.net.URL;
+import java.util.List;
+
+import org.jdom.Document;
+import org.jdom.Element;
+
+public class Discography extends PageParser {
+
+ private enum DiscographyListVariant { SIDEBAR, CENTRAL_INDEX };
+ /**
+ * detected on parseSelf() call and dictates
+ * what getChildNodesXPath() returns.
+ */
+ private DiscographyListVariant variant = DiscographyListVariant.SIDEBAR;
+
+ public Discography(URL url) throws IllegalArgumentException {super(url);}
+
+ public Discography(String s) throws IllegalArgumentException {super(s);}
+
+ public Discography() {super();}
+
+ @Override
+ protected void parseSelf(Document doc) throws ProblemsReadingDocumentException {
+ List<?> result = queryXPathList("//pre:title", doc);
+ if ((result != null) && (result.size()>0))
+ title = ((Element) result.get(0)).getText();
+ else
+ title = "Unknownband";
+
+ // now detect type of Discography
+ result = queryXPathList("//pre:ul[@title='Discography']", doc);
+ if (result.size()>0) {
+ variant = DiscographyListVariant.SIDEBAR;
+ return;
+ }
+
+ result = queryXPathList("//pre:div[@id='indexpage']", doc);
+ if (result.size()>0) {
+ variant = DiscographyListVariant.CENTRAL_INDEX;
+ }
+ }
+
+ @Override
+ public void saveResult(File saveTo) throws IOException {
+ File f = new File(saveTo, getFSSafeName(title));
+ if (!f.exists())
+ if (!f.mkdirs()) {
+ logger.severe(String.format("Directory creation failed (%s)%n",
+ f.getAbsolutePath()));
+ return;
+ }
+ logger.info( String.format("Discography: %s%n", title));
+ if (childPages != null) {
+ logger.info( String.format("Saving albums (%d):%n",
+ childPages.length));
+ for (int i = 0; i < childPages.length; i++) {
+ logger.info( String.format("\t%d. ", i + 1));
+ if (childPages[i] != null)
+ childPages[i].saveResult(f);
+ else
+ logger.info( "--- don't exist! --- \n");
+ }
+ }
+
+ }
+
+ @Override
+ protected void readCacheSelf(Element e) {
+ }
+
+ @Override
+ protected Element getSpecificDataXML() {
+ return new Element("Discography");
+ }
+
+ // field polymorphism
+ @Override
+ protected String getChildNodesXPath() {
+ switch (variant) {
+ case SIDEBAR:
+ return "//pre:ul[@title='Discography']//pre:div[@class='trackTitle']/pre:a";
+ case CENTRAL_INDEX:
+ return "//pre:div[@id='indexpage']//pre:h1/pre:a";
+ }
+ return null;
+ }
+
+ @Override
+ protected PageParser parseChild(Element element) throws ProblemsReadingDocumentException {
+ try {
+ String s = element.getAttributeValue("href");
+ Album c = new Album(s);
+ c.title = element.getText();
+ return c;
+ } catch (NullPointerException e) {
+ throw new ProblemsReadingDocumentException (e);
+ } catch (IllegalArgumentException e) {
+ throw new ProblemsReadingDocumentException (e);
+ }
+ }
+
+}
147 src/Main.java
@@ -0,0 +1,147 @@
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.logging.*;
+
+public class Main {
+ public static final String newline = System.getProperty ( "line.separator" );
+
+ /* Default values */
+ public static String baseURL = "http://homestuck.bandcamp.com";
+ public static String xmlFileName = "pages_scan_cache.xml";
+ public static String logFile = "download.log";
+ public static boolean allowTagging = true;
+ public static boolean allowFromCache = true;
+ public static boolean logToCon = true;
+ public static boolean logToFile = false;
+ // user current directory
+ public static File saveTo = new File((new File("")).getAbsolutePath());
+
+ public static Logger logger;
+ public static XMLCache cache;
+
+ private static void parseCommandLine(String[] args) {
+ for (String s : args) {
+ s = s.trim();
+ if (s.length() < 2)continue;
+ if ((s.charAt(0) != '-') && (s.charAt(0) != '/')) continue;
+ switch (s.substring(1, 2).toLowerCase().charAt(0)) {
+ case '?':
+ System.out
+ .print("homestuck.bandcamp.com downloader by A.Cerbic, 04.2011.\n"
+ + "\tuse these command-line parameters:\n"
+ + "\t\t-?\t to call for this help, duh!\n"
+ + "\t\t-silent\t to run this app ninja-style (silent). \n"
+ + "\t\t\t Beware, 500+ Mb download will leave you against blinking cursor for a while.\n"
+ + "\t\t-log\t to let the program take some notes into '"+logFile+"' file\n"
+ + "\t\t-d<URL>\t to start downloading from some URL other than default (homestuck.bandcamp.com)\n"
+ + "\t\t\t it _might_ even work, if you have some luck (how much luck? - ALL OF IT). \n"
+ + "\t\t\t No angle brackets in actual parameter, btw.\n"
+ + "\t\t-t<Directory>\t to define where to drop shitload if MP3s you gonna obtain,\n"
+ + "\t\t\t default is right where the program is.\n"
+ + "\n"
+ + "HINT: if the program discovers that a file for given track already exists, \n"
+ + "\tthat track download will be skipped, so if previous download session was interrupted you can\n"
+ + "\tjust run the program over and it will download only new files. \n"
+ + "\t\t-noretag\t will suppress this program's tagging efforts, keeping the original ID3 tags if any \n"
+ + "\t\t\t (without it all files will be retagged, both new and 'skipped')\n"
+ + "\n"
+ + "HINT: the program tries to cache relevant data from all the pages it goes through,\n"
+ + "\tso if it finds a web page that was parsed before and cached, the page won't be downloaded.\n"
+ + "\tIf you want all pages to be re-downloaded anew - delete '"+xmlFileName+"' or use next key:\n"
+ + "\t\t-fullscan\t forces all pages to be downloaded, cache gets updated.\n"
+ );
+ System.exit(0);
+ case 's': logToCon = false; break;
+ case 'l': logToFile = true; break;
+ case 'd': baseURL = s.substring(2); break;
+ case 't':
+ saveTo = new File(s.substring(2));
+ if (saveTo.isFile()) {
+ saveTo = null;
+ System.out
+ .println("-t must specify a directory. Default value is used.");
+ }
+ break;
+ case 'n': allowTagging = false; break;
+ case 'f': allowFromCache = false; break;
+ } //switch
+ } //for
+ }
+
+ private static void initLogger() {
+ logger = Logger.getLogger(Logger.GLOBAL_LOGGER_NAME);
+ logger.setLevel(Level.ALL);
+ logger.setFilter(null);
+ logger.setUseParentHandlers(false);
+ Formatter fNotVerySimple = new Formatter() {
+ @Override
+ public String format(LogRecord record) {
+ Throwable a = record.getThrown();
+ if (a == null) return String.format(record.getMessage(), record.getParameters());
+ StringBuilder result = new StringBuilder(500);
+ result.append(record.getLevel().toString()+": "+record.getMessage());
+ result.append(a.toString()+"\n");
+ for (StackTraceElement trace: a.getStackTrace())
+ result.append("\tat "+trace.toString()+"\n");
+ return result.toString();
+ }
+ };
+ if (logToCon) {
+ Handler hConsole = new StreamHandler(System.out,fNotVerySimple) {
+ public void publish(LogRecord record) {
+ super.publish(record);
+ flush();
+ }
+ };
+ hConsole.setLevel(Level.ALL);
+ logger.addHandler(hConsole);
+ }
+ if (logToFile) {
+ try {
+ Handler hFile = new StreamHandler(new FileOutputStream(logFile),fNotVerySimple) {
+ public void publish(LogRecord record) {
+ super.publish(record);
+ flush();
+ }
+ };
+ hFile.setLevel(Level.ALL);
+ logger.addHandler(hFile);
+ } catch (SecurityException e) {
+ logger.log(Level.SEVERE, "", e);
+ } catch (IOException e) {
+ logger.log(Level.SEVERE, "", e);
+ }
+
+ }
+
+ }
+
+ public static void main(String[] args) {
+ parseCommandLine(args);
+ initLogger(); // --> logger
+ try {
+ cache = new XMLCache(xmlFileName);
+ logger.info( String.format(
+ "Starting to download\n from <%s>\n into <%s> with%s retagging existing files.%n",
+ baseURL, saveTo, allowTagging?"":"out"));
+
+ PageParser topElement = PageParser.detectPage(baseURL);
+ logger.info( " ------- Survey: -------\n");
+ PageParser.isUsingCache = allowFromCache;
+ topElement.acquireData(true, cache.doc); // always download root page.
+ cache.saveCache();
+
+ logger.info( " ------- Acquisition: -------\n");
+ topElement.saveResult(saveTo);
+ logger.info( String.format("On total: %d files saved from net (%d bytes) + %d pages viewed%n",
+ WebDownloader.totalFileDownloadFinished,
+ WebDownloader.totalBytesDownloaded,
+ WebDownloader.totalPageDownloadFinished
+ ));
+ } catch (Throwable e) {
+ logger.log(Level.SEVERE, "", e);
+ }
+ }
+
+}
412 src/PageParser.java
@@ -0,0 +1,412 @@
+import java.io.*;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.jaxen.JaxenException;
+import org.jaxen.XPath;
+import org.jaxen.jdom.JDOMXPath;
+import org.jdom.*;
+import org.jdom.filter.ElementFilter;
+import org.jdom.input.SAXBuilder;
+
+/**
+ * Basic class to download page, parse it and download elements it references.
+ */
+public abstract class PageParser {
+
+ /**
+ * Exception generated if data cannot be read from XML cache
+ * or downloaded document
+ * @author A.Cerbic
+ */
+ @SuppressWarnings("serial")
+ public static class ProblemsReadingDocumentException extends Exception {
+
+ public String problemDocumentURL;
+ public String parentDocumentURL; /** may be null */
+
+ public ProblemsReadingDocumentException() {
+ super();
+ }
+ public ProblemsReadingDocumentException(Throwable e) {
+ super(e);
+ }
+ }
+
+ /**
+ * title of this item (as stored into cache) - SHOULD NOT be null
+ */
+ public String title;
+ /**
+ * url of a page referencing this item - SHOULD NOT be null
+ */
+ public URL url;
+ /**
+ * array of a children items to this one if any (null otherwise)
+ */
+ public PageParser[] childPages;
+ /**
+ * reference to a parent item (may be null)
+ */
+ public PageParser parent;
+ /**
+ * use caching facility?
+ */
+ public static boolean isUsingCache;
+
+ /**
+ * Inherited by all descendants and instances, providing unified logging.
+ */
+ protected static Logger logger = Logger.getLogger(Logger.GLOBAL_LOGGER_NAME);;
+
+ /**
+ * Is called ONLY for Class.newInstance reason.
+ * Make sure setUrl(URL url) is called right after it.
+ */
+ public PageParser() {}
+
+ /**
+ * Constructs from web address
+ * @param stringURL - web address
+ * @throws IllegalArgumentException if stringURL is null or bad
+ */
+ public PageParser(String stringURL) throws IllegalArgumentException {
+ try {url = resolveLink(stringURL);}
+ catch (MalformedURLException e) {throw new IllegalArgumentException(e);}
+ catch (NullPointerException e) {throw new IllegalArgumentException(e);}
+ }
+
+ /**
+ * Constructs from given URL object
+ * @param _url - gets assigned by reference, not copied.
+ * @throws IllegalArgumentException if _url == null
+ */
+ public PageParser(URL _url) throws IllegalArgumentException {
+ if (_url == null) throw new IllegalArgumentException();
+ url = _url;
+ }
+
+ /**
+ * Fills this instance with data, reading it from cache or parsing web page.
+ * Then repeats process for children pages
+ * @param forceDownload - true if you want ignore cache on this one, all child nodes checks are
+ * controlled with <b>isUsingCache</b> flag
+ * @param doc - XML document storing cache on pages data.
+ */
+ public final void acquireData(boolean forceDownload, org.jdom.Document doc) throws ProblemsReadingDocumentException {
+ assert (url != null);
+ logger.info( String.format("(%s) Reading: %s ... %n",
+ this.getClass().getName(), url.toString()));
+
+ if (forceDownload || !loadFromCache(doc)) {
+ downloadPage();
+ saveToCache(doc);
+ }
+
+ if (childPages == null) {
+ logger.info(String.format("Reading %s done. %n", url.toString()));
+ return;
+ }
+
+ logger.info( String.format("Reading %s done. Children: %d%n", childPages.length));
+ for (int i = 0; i < childPages.length; i++)
+ try {
+ childPages[i].acquireData(!isUsingCache, doc);
+ } catch (ProblemsReadingDocumentException e) {
+ childPages[i] = null;
+ logger.log(Level.WARNING, "", e);
+ } //skip to the next child page
+ }
+
+ /**
+ * Detects page type by its URL address (String)
+ * @param baseURL - String representation of URL
+ * @return new PageParser descendant fitting for the page
+ * @throws IllegalArgumentException - when baseURL is bad or null
+ */
+ public static final PageParser detectPage(String baseURL) throws IllegalArgumentException {
+ if (baseURL.contains("/track/"))
+ return new Track(baseURL);
+ if (baseURL.contains("/album/"))
+ return new Album(baseURL);
+ return new Discography(baseURL);
+ }
+
+ /**
+ * Convert a string to a proper file name.
+ * (checks for Windows prohibited chars only)
+ * @param from - string to convert
+ * @return proper file name
+ * @throws IOException if file name is not valid
+ */
+ public static String getFSSafeName(String from) throws IOException {
+ assert (from!=null);
+ String s = new String(from);
+ for (char c : ":/\\*?\"<>|\t\n\r".toCharArray())
+ s = s.replace(c, ' ');
+
+ File f = new File (s);
+
+ try {
+ // very awkward test for file name validness
+ if (f.createNewFile())
+ f.delete();
+ return s;
+ } catch (IOException e) {
+ // OK lets go try-hard on this
+ int hash = 0;
+ for (char c: s.toCharArray())
+ hash = (hash + (int)c)*2;
+ s = String.valueOf(hash);
+
+ f = new File(s);
+ // very awkward test for file name validness again
+ if (f.createNewFile())
+ f.delete();
+ return s;
+ }
+ }
+
+ /**
+ * Downloads the page and creates child nodes.
+ * @throws ProblemsReadingDocumentException if any error
+ */
+ private void downloadPage() throws ProblemsReadingDocumentException {
+ logger.log(Level.FINE, "[net]");
+
+ Document doc = null;
+ try {
+ SAXBuilder builder = new SAXBuilder("org.ccil.cowan.tagsoup.Parser");
+ doc = builder.build(url.toString());
+ } catch (Exception e) {throw new ProblemsReadingDocumentException(e);}
+ WebDownloader.totalPageDownloadFinished++;
+
+ parseSelf(doc);
+
+ @SuppressWarnings("unchecked")
+ List<Element> result = (List<Element>) queryXPathList(getChildNodesXPath(), doc);
+ childPages = new PageParser[result.size()];
+ for (int i = 0; i<result.size(); i++) {
+ try {
+ childPages[i] = parseChild(result.get(i));
+ childPages[i].parent = this;
+ } catch (ProblemsReadingDocumentException e) {
+ logger.log(Level.WARNING, "unable to parse child data", e);
+ } // skip this child to next one
+ }
+ }
+
+ /**
+ * Returns an XPath string to get links to children pages
+ * from current page. All tags in the path are in "pre" namespace.
+ * @return the XPath string or null, if no children expected.
+ */
+ protected abstract String getChildNodesXPath();
+
+ /**
+ * Implemented by descendants to provide their own attributes for caching into XML.
+ * @return JDOM element with some custom data of this node.
+ */
+ protected abstract Element getSpecificDataXML() ;
+
+ /**
+ * Gets data about this page from cache (JDOM tree) and creates child nodes.
+ * @param doc - JDOM Document to load from
+ * @return true if data acquired successfully, false otherwise
+ */
+ private boolean loadFromCache (org.jdom.Document doc) {
+ assert (doc!= null);
+ logger.log(Level.FINE, "[cache]");
+ try {
+ Element e = scanXMLForThisElement(doc);
+ if (null == e) return false;
+ title = e.getAttributeValue("title");
+ readCacheSelf(e);
+
+ @SuppressWarnings("unchecked")
+ List<Element> l = e.getContent(new ElementFilter("childref"));
+ int size = l.size();
+ if (size>0) {
+ childPages = new PageParser[size];
+ Iterator<Element> itr = l.listIterator();
+ for (int i=0; i<size; i++) {
+ childPages[i] = readCacheChild(itr.next());
+ childPages[i].parent = this;
+ }
+ }
+ return true;
+ } catch (ProblemsReadingDocumentException e) {
+ // If ANY problem, quit with a fail code
+ childPages = null;
+ return false;
+ }
+
+ }
+
+ /**
+ * Extracts information from downloaded page about child pages
+ * @param element - fragment of a page containing data about a child page
+ * @return New child object
+ * @throws ProblemsReadingDocumentException if child cannot be created from this Element
+ */
+ protected abstract PageParser parseChild(Element element) throws ProblemsReadingDocumentException;
+
+ /**
+ * Extracts more information after a download about this page
+ * @param doc - JDOM Document of the page parsed
+ */
+ protected abstract void parseSelf(Document doc) throws ProblemsReadingDocumentException;
+
+ /**
+ * Queries given JDOM document with XPath string
+ * @param q - XPath string with all nodes in "pre" namespace
+ * @param doc - JDOM Document or Element
+ * @return List of found matches, may be of zero size if nothing is found
+ */
+ protected final List<?> queryXPathList(String q, Document doc) {
+ return queryXPathList (q, doc.getRootElement());
+ }
+
+ /**
+ * Queries given JDOM document with XPath string
+ * @param q - XPath string with all nodes in "pre" namespace
+ * @param doc - JDOM Document or Element
+ * @return List of found matches, may be of zero size if nothing is found
+ */
+ protected final List<?> queryXPathList(String q, Element doc) {
+ try {
+ String nsURI = doc.getNamespaceURI();
+ XPath xpath = new JDOMXPath(q);
+ xpath.addNamespace("pre", nsURI);
+ return xpath.selectNodes(doc);
+ } catch (JaxenException e) {
+ logger.log(Level.SEVERE,"",e);
+ return new ArrayList<Object>(0);
+ }
+ }
+
+ /**
+ * Reads child's page title/url data from cache and creates a node.
+ * @param e - <childref ...> tag describing a child
+ * @return new PageParser child node.
+ * @throws ProblemsReadingDocumentException if anything went wrong
+ */
+ private PageParser readCacheChild(Element e) throws ProblemsReadingDocumentException {
+ PageParser child = null;
+ try {
+ String u = e.getAttributeValue("url");
+ String c = e.getAttributeValue("class");
+ child = (PageParser)Class.forName(c).newInstance();
+ child.setUrl(u);
+ } catch (ClassNotFoundException e1) {
+ throw new ProblemsReadingDocumentException(e1);
+ } catch (InstantiationException e1) {
+ throw new ProblemsReadingDocumentException(e1);
+ } catch (IllegalAccessException e1) {
+ throw new ProblemsReadingDocumentException(e1);
+ } catch (MalformedURLException e1) {
+ throw new ProblemsReadingDocumentException(e1);
+ } catch (NullPointerException e1) {
+ throw new ProblemsReadingDocumentException(e1);
+ }
+ return child;
+
+ }
+
+ /**
+ * Reads class-specific info from XML cache element.
+ * @param e - element to read from
+ * @throws ProblemsReadingDocumentException if reading data from XML fails
+ */
+ protected abstract void readCacheSelf(Element e) throws ProblemsReadingDocumentException;
+
+ /**
+ * Gets an URL to resource referenced from this page.
+ * Uses this.url as a base link to resolve relative paths.
+ * @param link - relative or absolute link
+ * @return proper URL with absolute path
+ * @throws MalformedURLException
+ */
+ protected final URL resolveLink(String link) throws MalformedURLException {
+ return new URL(url, link);
+ }
+
+ /**
+ * Saves extracted data to disk, then saves children too.
+ * @param saveTo - directory to save info to.
+ * @throws IOException
+ */
+ public abstract void saveResult(File saveTo) throws IOException;
+
+ /**
+ * Saves this page data into XML tree.
+ * Only references to child pages are saved, not the pages data.
+ * @param doc - JDOM Document holding a cache to save to
+ */
+ private void saveToCache (org.jdom.Document doc) {
+ assert (doc != null);
+
+ // absolutely required fields
+ if (title==null || url==null)
+ return;
+ //1. Compose this one and childrefs
+ Element e = getSpecificDataXML();
+ if (e==null) return; // element is corrupt and should not be cached
+ e.setAttribute("title", title);
+ e.setAttribute("url", url.toString());
+ if (childPages != null)
+ for (PageParser child: childPages)
+ if (child != null) {
+ Element childElement = new Element("childref");
+ childElement.setAttribute("class",child.getClass().getName());
+ childElement.setAttribute("url",child.url.toString());
+ e.addContent(childElement);
+ }
+ //2. Drop old elements with the same URL
+ Element root = doc.getRootElement();
+
+ // TODO needs testing
+ @SuppressWarnings("unchecked")
+ List<Element> oldCachedElements = (List<Element>) queryXPathList(
+ String.format("//pre:%s[@url='%s']",e.getName(),url.toString()),
+ doc);
+ for (Element current: oldCachedElements)
+ current.detach();
+
+
+// Iterator<?> itr = root.getDescendants(new ElementFilter(e.getName()));
+// while (itr.hasNext()) {
+// Element current = (Element) itr.next();
+// if (current.getAttributeValue("url").equals(url.toString()))
+// itr.remove();
+// }
+
+ //3. Add this element to cache
+ root.addContent(e);
+ }
+
+ /**
+ * Scans XML tree to find the 1st Element eligible to read from
+ * @return found Element or null
+ */
+ private Element scanXMLForThisElement(org.jdom.Document doc) {
+ assert (doc != null); assert (url != null);
+ String searchXPath = String.format("//%s[@url='%s']",getClass().getName(),url.toString());
+ List<?> result = queryXPathList(searchXPath, doc);
+
+ return result.size()>0?(Element)result.get(0):null;
+ }
+ /**
+ * Fixes the url after no-argument constructor was called
+ * @param s - URL String to initialize from
+ * @throws MalformedURLException if s is bad or null
+ */
+ private void setUrl(String s) throws MalformedURLException {
+ url = resolveLink(s);
+ }
+}
4 src/StatisticGatherer.java
@@ -0,0 +1,4 @@
+
+public class StatisticGatherer {
+
+}
224 src/Track.java
@@ -0,0 +1,224 @@
+import java.io.File;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.*;
+import java.util.logging.Level;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.jdom.Document;
+import org.jdom.Element;
+
+import entagged.audioformats.AudioFile;
+import entagged.audioformats.AudioFileIO;
+import entagged.audioformats.exceptions.CannotReadException;
+import entagged.audioformats.exceptions.CannotWriteException;
+import entagged.audioformats.generic.TagField;
+import entagged.audioformats.mp3.util.id3frames.TextId3Frame;
+
+public class Track extends PageParser {
+
+ private Map<String, String> properties = new HashMap<String,String>();
+ public String getProperty(String name) {
+ // convention to PageParser
+ return name.equals("title")? title : properties.get(name) ;
+ }
+ public String setProperty(String name, String value) {
+ if (name.equals("title")) {
+ // convention to PageParser
+ String lastValue = title;
+ title = value;
+ return lastValue;
+ } else return properties.put(name, value);
+ }
+
+ // dataPatterns are to read Track info from downloaded page
+ private static final Map<String, Pattern> dataPatterns = new HashMap<String,Pattern>();
+ // tagToID3V2Frame are to save Track info to mp3 Tag
+ private static final Map<String,String> tagToID3V2Frame = new HashMap<String,String>();
+ /** XMLCacheDataKeys are keys to Track properties that are used by readXMLSelf() and getSpecificDataXML().<br/>
+ * "title" is not included because if you don't get it with loadFromCache() call,
+ * you will download and parse the full page anyway
+ */
+ private static final String[] XMLCacheDataKeys = {"mediaLink", "artist", "track", "album"};
+
+ static {
+ dataPatterns.put("mediaLink", Pattern.compile(".*trackinfo:.*\"file\":\"([^\"]*)\".*", Pattern.DOTALL));
+ dataPatterns.put("artist", Pattern.compile(".*artist\\s*:\\s*\"([^\"]*)\".*", Pattern.DOTALL));
+ dataPatterns.put("album", Pattern.compile(".*album_title\\s*:\\s*\"([^\"]*)\".*", Pattern.DOTALL));
+ dataPatterns.put("title", Pattern.compile(".*title\\s*:\\s*\"([^\"]*)\".*", Pattern.DOTALL));
+// dataPatterns.put("track", Pattern.compile(".*numtracks\\s*:\\s*([\\d]*).*", Pattern.DOTALL));
+ dataPatterns.put("comment", Pattern.compile(".*trackinfo:.*\"has_info\":\"([^\"]*)\".*", Pattern.DOTALL));
+
+ tagToID3V2Frame.put("title", "TIT2");
+ tagToID3V2Frame.put("track", "TRCK");
+ tagToID3V2Frame.put("album", "TALB");
+ tagToID3V2Frame.put("artist", "TPE1");
+ }
+
+ public Track(String s) throws IllegalArgumentException {super(s);}
+ public Track(URL url) throws IllegalArgumentException {super(url);}
+ public Track() {super();}
+
+ @Override
+ public void saveResult(File saveTo) throws IOException {
+ File f = new File(saveTo, getFSSafeName(title) + ".mp3");
+ if (f.isDirectory()) {
+ logger.info( "<"+title+"> is a directory!!!\n");
+ return;
+ }
+ logger.info( title+" ... ");
+ if (WebDownloader.fetchWebFile(getProperty("mediaLink"), f) == 0)
+ logger.info( "skipped.\n");
+ else
+ logger.info( "done.\n");
+
+ tagMp3File(f);
+ }
+
+ /**
+ * Checks the file and tags it if appropriate
+ * @param f - file to tag
+ */
+ private void tagMp3File(File f) {
+ try {
+ AudioFile mp3File = AudioFileIO.read(f);
+ entagged.audioformats.Tag mp3Tag = mp3File.getTag();
+
+ // this works around the bug in a lib, that causes drop all fields when
+ // generic (newly created by getTag()) Tag is converted to ID3v2 on commit()
+ if (!mp3Tag.getFields().hasNext()) {
+ mp3Tag.setAlbum("1");
+ mp3File.commit();
+ mp3File = AudioFileIO.read(f);
+ mp3Tag = mp3File.getTag();
+ mp3Tag.setAlbum("");
+ }
+
+ if (mp3Tag.getFirstTrack().equals("0")) {
+ @SuppressWarnings("rawtypes")
+ Iterator it = mp3Tag.getTrack().iterator();
+ while (it.hasNext()) {
+ it.next();
+ it.remove(); // the only way to remove field is by iterator
+ }
+ }
+
+ boolean updateMP3Tag = false;
+
+ // copy this Track's data into mp3Tag
+ for (Map.Entry<String, String> entry: tagToID3V2Frame.entrySet())
+ try {
+ String fieldValue = getProperty(entry.getKey());
+ if (!fieldValue.isEmpty()) {
+ TagField x = new TextId3Frame(entry.getValue(), fieldValue);
+ if (Main.allowTagging) {
+ // always rewrite with new value
+ mp3Tag.set(x);
+ updateMP3Tag = true;
+ }
+ else {
+ // rewrite only absent or empty tags.
+ @SuppressWarnings("unchecked")
+ List<TagField> idFieldSet = mp3Tag.get(entry.getValue());
+ if (idFieldSet == null || idFieldSet.size()==0 ||
+ idFieldSet.get(0) == null || idFieldSet.get(0).isEmpty()) {
+
+ mp3Tag.set(x);
+ updateMP3Tag = true;
+ }
+ }
+ }
+ } catch (NullPointerException e) {} // skip Track missing field
+
+
+ if (updateMP3Tag)
+ mp3File.commit();
+
+ } catch (CannotReadException e) {
+ logger.log(Level.SEVERE, "", e);
+ } catch (CannotWriteException e) {
+ logger.log(Level.SEVERE, "", e);
+ }
+ }
+
+ @Override
+ protected void readCacheSelf(Element e) throws ProblemsReadingDocumentException {
+ for (String key: Arrays.asList(XMLCacheDataKeys)) {
+ String value = e.getAttributeValue(key);
+ if (value==null) throw new ProblemsReadingDocumentException();
+ setProperty(key,value);
+ }
+ }
+
+ @Override
+ protected Element getSpecificDataXML() {
+ if (getProperty("mediaLink") == null) return null; //no saving track data if no track present
+
+ Element e = new Element("Track");
+ for (String key: Arrays.asList(XMLCacheDataKeys)) {
+ String value = getProperty(key);
+ if (value==null) value = "";
+ e.setAttribute(key, value);
+ }
+ return e;
+ }
+
+ @Override
+ protected PageParser parseChild(Element element)
+ throws ProblemsReadingDocumentException {
+ return null; // stub since no child nodes XPath and this will never be called
+ }
+
+ @Override
+ protected void parseSelf(Document doc)
+ throws ProblemsReadingDocumentException {
+ @SuppressWarnings("unchecked")
+ List<Element> scriptList = (List<Element>) queryXPathList("//pre:div[@id='pgBd']/pre:script", doc);
+ for (Element el: scriptList) {
+ String rawData = el.getText();
+ // clear JavaScript escaping: "\/" --> "/", etc.
+ rawData = rawData.replaceAll("\\\\(.)", "$1");
+
+ // try to recover each tag by its pattern
+ for (Map.Entry<String, Pattern> entry: dataPatterns.entrySet()) {
+ Matcher m = entry.getValue().matcher(rawData);
+ if (m.matches())
+ setProperty(entry.getKey(), m.group(1));
+ }
+
+ }
+ // fix url
+ String relativePath = getProperty("mediaLink");
+ try {
+ setProperty("mediaLink", resolveLink(relativePath).toString());
+ } catch (MalformedURLException e) {
+ setProperty("mediaLink", null);
+ throw new ProblemsReadingDocumentException(e);
+ }
+
+ try {
+ // if album or artist data is missing we can try to salvage it
+ // from parenting pages
+ String album = getProperty("album");
+ String artist = getProperty("artist");
+ if (album==null || album.isEmpty())
+ setProperty("album",parent.title);
+ if (artist==null || artist.isEmpty())
+ setProperty("artist",parent.parent.title);
+ // fix track number
+ if (Integer.parseInt(getProperty("track"))<=0)
+ properties.remove("track");
+ } catch (NullPointerException e) {
+ } catch (NumberFormatException e) {
+ properties.remove("track");
+ }
+
+ }
+
+ @Override
+ protected String getChildNodesXPath() {
+ return null;
+ }
+}
114 src/WebDownloader.java
@@ -0,0 +1,114 @@
+import java.io.*;
+import java.net.URL;
+import java.net.URLConnection;
+
+/**
+ * Helps downloading resources from the net, both html pages and other files
+ * @author A.Cerbic
+ */
+public class WebDownloader {
+
+ public final static int MAX_LENGTH_CHECKS = 5;
+ public static int lastCheckWebLength = 0;
+ public static URL lastCheckWebURL = null;
+
+ /* statistics section */
+ public static int totalLengthChecks = 0;
+ public static int totalFileDownloadAttempts = 0;
+ public static int totalFileDownloadFinished = 0;
+ public static int totalPageDownloadFinished = 0;
+ public static long totalBytesDownloaded = 0;
+
+ /**
+ * Downloads and saves a resource by given string address (URL).
+ * @param from - resource url address
+ * @param to - File to save to.
+ * @return size of the downloaded file in bytes,
+ * 0 if download was skipped (file exists and not zero-length or server has responded bad)
+ * @throws FileNotFoundException if can not open file for writing
+ * @throws IOException on other stream problems
+ */
+ public static long fetchWebFile(String from, File to) throws FileNotFoundException, IOException {
+ URL u = new URL(from);
+ return fetchWebFile(u,to);
+ }
+
+ /**
+ * Downloads and saves a resource by given URL.
+ * @param from - page address (HTTP assumed)
+ * @param to - file to save the downloaded resource
+ * @return size of the downloaded file in bytes,
+ * 0 if download was skipped (file exists and not zero-length or server has responded bad)
+ * @throws FileNotFoundException if can not open file for writing
+ * @throws IOException on other stream problems
+ */
+ public static long fetchWebFile(URL from, File to) throws FileNotFoundException, IOException {
+ totalFileDownloadAttempts++;
+ BufferedInputStream bis = null;
+ BufferedOutputStream bos = null;
+
+ URLConnection connection = from.openConnection();
+// int fileLength = 0;
+ // check content type header to catch 404, 403... error responses
+ if (connection.getContentType().contains("text/"))
+ return 0;
+
+// fileLength = connection.getContentLength();
+// totalLengthChecks++;
+
+ /* just delete zero-length files. if size is non-zero, skip it */
+ if (to.isFile() && (to.length() > 0))
+ return 0;
+ to.delete();
+
+ try {
+ bis = new BufferedInputStream(connection.getInputStream());
+ bos = new BufferedOutputStream(new FileOutputStream(to));
+
+ byte[] buff = new byte[1024 * 100];
+ int numRead;
+ while ((numRead = bis.read(buff)) != -1)
+ bos.write(buff, 0, numRead);
+ } catch (FileNotFoundException e) {
+ throw e;
+ } catch (IOException e) {
+ // on actual write loop to the file;
+ to.delete();
+ throw e;
+ } finally {
+ if (bis != null) bis.close();
+ if (bos != null) bos.close();
+ }
+ totalFileDownloadFinished++;
+ totalBytesDownloaded += to.length();
+ return to.length();
+ }
+
+ /**
+ * Downloads a web page by given URL.
+ * @param url - page address (HTTP assumed)
+ * @return the whole page as a String (no headers)
+ * @throws IOException
+ */
+// public static String fetchWebPage(URL url) throws IOException {
+// StringBuilder webPage = new StringBuilder(1024 * 1024);
+// BufferedReader streamBuffer = null;
+//
+// try {
+// streamBuffer = new BufferedReader(new InputStreamReader(
+// url.openStream()));
+//
+// char[] buf = new char[1024 * 1024]; // 1Mb buffer for a page
+// int numRead;
+// while ((numRead = streamBuffer.read(buf)) != -1)
+// webPage.append(buf, 0, numRead);
+//
+// } finally {
+// if (streamBuffer!=null) streamBuffer.close();
+// }
+// totalBytesDownloaded += webPage.length()*(Character.SIZE>>3);
+// totalPageDownloadFinished++;
+// return webPage.toString();
+// }
+
+}
41 src/XMLCache.java
@@ -0,0 +1,41 @@
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import org.jdom.Document;
+import org.jdom.Element;
+import org.jdom.output.Format;
+import org.jdom.output.XMLOutputter;
+
+public class XMLCache {
+ public XMLOutputter outputter;
+ public File xmlFile;
+ public Document doc;
+
+ public XMLCache(String xmlFileName) throws IOException {
+ try {
+ xmlFile = new File(xmlFileName);
+ if (xmlFile.exists()) {
+ org.jdom.input.SAXBuilder builder = new org.jdom.input.SAXBuilder();
+ builder.setIgnoringBoundaryWhitespace(true);
+ builder.setIgnoringElementContentWhitespace(true);
+ doc = builder.build(xmlFile);
+ }
+ } catch (Exception e) {}
+
+ if (doc == null) doc = new Document(new Element("root"));
+ outputter = new XMLOutputter();
+ Format xmlOutputFormat = outputter.getFormat();
+ xmlOutputFormat.setIndent(" ");
+ xmlOutputFormat.setLineSeparator("\n");
+ outputter.setFormat(xmlOutputFormat);
+ }
+
+
+ /**
+ * Saves XML cache back into a file
+ * @throws IOException
+ */
+ public void saveCache() throws IOException {
+ outputter.output(doc, new FileOutputStream(xmlFile, false));
+ }
+}
3 time_measure.cmd
@@ -0,0 +1,3 @@
+echo Starting at %time% >> measure.log
+Bandcamp_downloader.cmd -tD:\temp\
+echo Ending at %time% >> measure.log

0 comments on commit 8f59567

Please sign in to comment.