Skip to content

Commit

Permalink
v20151112
Browse files Browse the repository at this point in the history
  • Loading branch information
Nandaka committed Nov 12, 2015
1 parent e9eb814 commit 44a4e5e
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 8 deletions.
20 changes: 20 additions & 0 deletions PixivConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ class PixivConfig:
enableInfiniteLoop = False
verifyImage = False
writeUrlInDescription = False
urlBlacklistRegex = ""
urlDumpFilename = "url_list_%Y%m%d"

# IrfanView
createDownloadLists = False
Expand Down Expand Up @@ -423,6 +425,20 @@ def loadConfig(self, path=None):
self.writeUrlInDescription = False
haveError = True

try:
self.urlBlacklistRegex = config.get('Settings','urlBlacklistRegex')
except ValueError:
print "urlBlacklistRegex = "
self.urlBlacklistRegex = ""
haveError = True

try:
self.urlDumpFilename = config.get('Settings','urlDumpFilename')
except ValueError:
print "urlDumpFilename = url_list_%Y%m%d"
self.urlDumpFilename = "url_list_%Y%m%d"
haveError = True

## except ConfigParser.NoOptionError:
## print 'Error at loadConfig():',sys.exc_info()
## print 'Failed to read configuration.'
Expand Down Expand Up @@ -499,6 +515,8 @@ def writeConfig(self, error=False, path=None):
config.set('Settings', 'enableInfiniteLoop', self.enableInfiniteLoop)
config.set('Settings', 'verifyImage', self.verifyImage)
config.set('Settings', 'writeUrlInDescription', self.writeUrlInDescription)
config.set('Settings', 'urlBlacklistRegex', self.urlBlacklistRegex)
config.set('Settings', 'urlDumpFilename', self.urlDumpFilename)

config.add_section('Authentication')
config.set('Authentication', 'username', self.username)
Expand Down Expand Up @@ -595,6 +613,8 @@ def printConfig(self):
print ' - enableInfiniteLoop =', self.enableInfiniteLoop
print ' - verifyImage =', self.verifyImage
print ' - writeUrlInDescription =', self.writeUrlInDescription
print ' - urlBlacklistRegex =', self.urlBlacklistRegex
print ' - urlDumpFilename =', self.urlDumpFilename

print ' [Pixiv]'
print ' - numberOfPage =', self.numberOfPage
Expand Down
2 changes: 1 addition & 1 deletion PixivConstant.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: UTF-8 -*-
# pylint: disable=I0011, C, C0302

PIXIVUTIL_VERSION = '20151026-beta'
PIXIVUTIL_VERSION = '20151112'
PIXIVUTIL_LINK = 'https://nandaka.wordpress.com/tag/pixiv-downloader/'
PIXIV_URL = 'http://www.pixiv.net'
PIXIV_URL_SSL = 'https://www.secure.pixiv.net/login.php'
Expand Down
21 changes: 18 additions & 3 deletions PixivHelper.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,12 +602,27 @@ def generateSearchTagUrl(tags, page, title_caption, wild_card, oldest_first,

return url

def writeUrlInDescription(image):
def writeUrlInDescription(image, blacklistRegex, filenamePattern):
valid_url = list()
if len(image.descriptionUrlList) > 0:
filename = "url_list_" + datetime.date.today().strftime("%Y%m%d") + ".txt"
# filter first
if len(blacklistRegex) > 0 :
for link in image.descriptionUrlList:
res = re.findall(blacklistRegex, link)
if len(res) == 0:
valid_url.append(link)
else:
valid_url = image.descriptionUrlList

# then write
if len(valid_url) > 0:
if len(filenamePattern) == 0:
filenamePattern = "url_list_%Y%m%d"
filename = datetime.date.today().strftime(filenamePattern) + ".txt"

info = codecs.open(filename, 'a', encoding='utf-8')
info.write("#" + str(image.imageId)+"\r\n")
for link in image.descriptionUrlList:
for link in valid_url:
info.write(link + "\r\n")
info.close()

2 changes: 1 addition & 1 deletion PixivUtil2.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,7 @@ def process_image(mode, artist=None, image_id=None, user_dir='', bookmark=False,
PixivHelper.printAndLog('info', "Deleting zip file => " + filename)
os.remove(filename)
if __config__.writeUrlInDescription:
PixivHelper.writeUrlInDescription(image)
PixivHelper.writeUrlInDescription(image, __config__.urlBlacklistRegex, __config__.urlDumpFilename)


# Only save to db if all images is downloaded completely
Expand Down
7 changes: 5 additions & 2 deletions changelog.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
20151026-beta:
2015112:
- Fix Issue #96: page 100 is not downloaded for new illust page.
- Update test page.
- Implement Feature #95: dump url list to text file. Set writeUrlInDescription = True to enable.
- Implement Feature #95: dump url list to text file.
Set writeUrlInDescription = True to enable.
Set urlBlacklistRegex to define url to ignore.
Set urlDumpFilename to define the dump filename, use python strftime() format.

20151019:
- Update proxy handler.
Expand Down
5 changes: 4 additions & 1 deletion readme.txt
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,10 @@ verifyimage ==> Do image and zip checking after download. Set the value to
writeUrlInDescription ==> Write all url found in the image description to a text
file. Set to True to enable. The list will be saved to
to the application folder as url_list_<timestamp>.txt

urlBlacklistRegex ==> Used to filter out the url in the description using
regular expression.
urlDumpFilename ==> Define the dump filename, use python strftime() format.
Default value is 'url_list_%Y%m%d'

=================================================================================
= list.txt Format =
Expand Down

0 comments on commit 44a4e5e

Please sign in to comment.