v20151112

Nandaka · Nov 12, 2015 · 44a4e5e · 44a4e5e
1 parent e9eb814
commit 44a4e5e
Show file tree

Hide file tree

Showing 6 changed files with 49 additions and 8 deletions.
diff --git a/PixivConfig.py b/PixivConfig.py
@@ -66,6 +66,8 @@ class PixivConfig:
     enableInfiniteLoop = False
     verifyImage = False
     writeUrlInDescription = False
+    urlBlacklistRegex = ""
+    urlDumpFilename = "url_list_%Y%m%d"
 
     # IrfanView
     createDownloadLists = False
@@ -423,6 +425,20 @@ def loadConfig(self, path=None):
                 self.writeUrlInDescription = False
                 haveError = True
 
+            try:
+                self.urlBlacklistRegex = config.get('Settings','urlBlacklistRegex')
+            except ValueError:
+                print "urlBlacklistRegex = "
+                self.urlBlacklistRegex = ""
+                haveError = True
+
+            try:
+                self.urlDumpFilename = config.get('Settings','urlDumpFilename')
+            except ValueError:
+                print "urlDumpFilename = url_list_%Y%m%d"
+                self.urlDumpFilename = "url_list_%Y%m%d"
+                haveError = True
+
 ##        except ConfigParser.NoOptionError:
 ##            print 'Error at loadConfig():',sys.exc_info()
 ##            print 'Failed to read configuration.'
@@ -499,6 +515,8 @@ def writeConfig(self, error=False, path=None):
         config.set('Settings', 'enableInfiniteLoop', self.enableInfiniteLoop)
         config.set('Settings', 'verifyImage', self.verifyImage)
         config.set('Settings', 'writeUrlInDescription', self.writeUrlInDescription)
+        config.set('Settings', 'urlBlacklistRegex', self.urlBlacklistRegex)
+        config.set('Settings', 'urlDumpFilename', self.urlDumpFilename)
 
         config.add_section('Authentication')
         config.set('Authentication', 'username', self.username)
@@ -595,6 +613,8 @@ def printConfig(self):
         print ' - enableInfiniteLoop    =', self.enableInfiniteLoop
         print ' - verifyImage      =', self.verifyImage
         print ' - writeUrlInDescription =', self.writeUrlInDescription
+        print ' - urlBlacklistRegex =', self.urlBlacklistRegex
+        print ' - urlDumpFilename =', self.urlDumpFilename
 
         print ' [Pixiv]'
         print ' - numberOfPage =', self.numberOfPage

diff --git a/PixivConstant.py b/PixivConstant.py
@@ -1,7 +1,7 @@
 # -*- coding: UTF-8 -*-
 # pylint: disable=I0011, C, C0302
 
-PIXIVUTIL_VERSION = '20151026-beta'
+PIXIVUTIL_VERSION = '20151112'
 PIXIVUTIL_LINK = 'https://nandaka.wordpress.com/tag/pixiv-downloader/'
 PIXIV_URL = 'http://www.pixiv.net'
 PIXIV_URL_SSL = 'https://www.secure.pixiv.net/login.php'

diff --git a/PixivHelper.py b/PixivHelper.py
@@ -602,12 +602,27 @@ def generateSearchTagUrl(tags, page, title_caption, wild_card, oldest_first,
 
     return url
 
-def writeUrlInDescription(image):
+def writeUrlInDescription(image, blacklistRegex, filenamePattern):
+    valid_url = list()
     if len(image.descriptionUrlList) > 0:
-        filename = "url_list_" + datetime.date.today().strftime("%Y%m%d") + ".txt"
+        # filter first
+        if len(blacklistRegex) > 0 :
+            for link in image.descriptionUrlList:
+                res = re.findall(blacklistRegex, link)
+                if len(res) == 0:
+                    valid_url.append(link)
+        else:
+            valid_url = image.descriptionUrlList
+
+    # then write
+    if len(valid_url) > 0:
+        if len(filenamePattern) == 0:
+            filenamePattern = "url_list_%Y%m%d"
+        filename = datetime.date.today().strftime(filenamePattern) + ".txt"
+
         info = codecs.open(filename, 'a', encoding='utf-8')
         info.write("#" + str(image.imageId)+"\r\n")
-        for link in image.descriptionUrlList:
+        for link in valid_url:
             info.write(link + "\r\n")
         info.close()
 
diff --git a/PixivUtil2.py b/PixivUtil2.py
@@ -647,7 +647,7 @@ def process_image(mode, artist=None, image_id=None, user_dir='', bookmark=False,
                         PixivHelper.printAndLog('info', "Deleting zip file => " + filename)
                         os.remove(filename)
             if __config__.writeUrlInDescription:
-                PixivHelper.writeUrlInDescription(image)
+                PixivHelper.writeUrlInDescription(image, __config__.urlBlacklistRegex, __config__.urlDumpFilename)
 
 
         # Only save to db if all images is downloaded completely

diff --git a/changelog.txt b/changelog.txt
@@ -1,7 +1,10 @@
-20151026-beta:
+2015112:
 - Fix Issue #96: page 100 is not downloaded for new illust page.
 - Update test page.
-- Implement Feature #95: dump url list to text file. Set writeUrlInDescription = True to enable.
+- Implement Feature #95: dump url list to text file. 
+  Set writeUrlInDescription = True to enable.
+  Set urlBlacklistRegex to define url to ignore.
+  Set urlDumpFilename to define the dump filename, use python strftime() format.
 
 20151019:
 - Update proxy handler.

diff --git a/readme.txt b/readme.txt
@@ -380,7 +380,10 @@ verifyimage     ==> Do image and zip checking after download. Set the value to
 writeUrlInDescription ==> Write all url found in the image description to a text 
 			  file. Set to True to enable. The list will be saved to
                           to the application folder as url_list_<timestamp>.txt
-
+urlBlacklistRegex     ==> Used to filter out the url in the description using
+                          regular expression.
+urlDumpFilename       ==> Define the dump filename, use python strftime() format.
+                          Default value is 'url_list_%Y%m%d'
 
 =================================================================================
 = list.txt Format                                                               =