Skip to content

Commit

Permalink
add more free url & fix bug
Browse files Browse the repository at this point in the history
  • Loading branch information
Jiezhi committed May 22, 2016
1 parent 3d07b22 commit e85f8c8
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 6 deletions.
1 change: 1 addition & 0 deletions .gitignore
@@ -1,2 +1,3 @@

.idea
oreilly
29 changes: 23 additions & 6 deletions oreilly_free.py
Expand Up @@ -35,7 +35,10 @@ def download_file(url):
:return: The downloaded file name
"""
local_filename = url.split('/')[-1]
local_filename = os.path.join('oreilly', local_filename)
dir_name = 'oreilly' + os.path.sep + url.split('/')[-4]
if not os.path.exists(dir_name):
os.makedirs(dir_name)
local_filename = os.path.join(dir_name, local_filename)
if os.path.exists(local_filename):
print('file already downloaded: ', local_filename)
return local_filename
Expand All @@ -62,13 +65,27 @@ def get_free_book(html):
books = soup.find_all('a', {'data-toggle': 'popover'})
print('Find %d book(s)...', len(books))
for book in books:
book_name = get_keyword(book['href'], 'free/', '.csp')
href = book['href']
if not href or 'player.oreilly.com' in href:
print("it's a video page, igored: ", href)
continue
book_name = get_keyword(href, 'free/', '.csp')
book_url = 'http://www.oreilly.com/programming/free/files/%s.pdf' % book_name
print(book_url)
download_file(book_url)
book_url = 'http://www.oreilly.com/programming/free/files/%s.mobi' % book_name
download_file(book_url)


if __name__ == '__main__':
free_oreilly = 'http://www.oreilly.com/programming/free/'
html = requests.get(free_oreilly)
get_free_book(html)
free_oreilly = ['http://www.oreilly.com/programming/free/',
'http://www.oreilly.com/web-platform/free/',
'http://www.oreilly.com/security/free/',
'http://www.oreilly.com/business/free/',
'http://www.oreilly.com/data/free/',
'http://www.oreilly.com/iot/free/',
'http://www.oreilly.com/design/free/',
'http://www.oreilly.com/webops-perf/free/',
]
for free in free_oreilly:
html = requests.get(free)
get_free_book(html.content)

0 comments on commit e85f8c8

Please sign in to comment.