Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Move cli application to lib and improve cov tool.
- Loading branch information
EvgeneOskin
committed
Oct 20, 2015
1 parent
412423a
commit 33ebc48
Showing
3 changed files
with
141 additions
and
133 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
SimpleCov.start do | ||
add_filter "/spec/" | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,136 +1,5 @@ | ||
#!/usr/bin/env ruby | ||
require 'rubygems' | ||
require 'commander' | ||
require 'parallel' | ||
require 'net/http' | ||
require 'mathnet/crawler' | ||
require 'exponential_backoff' | ||
require 'mathnet/cli' | ||
|
||
# CLI for search and download articles from mathnet site. | ||
class MathnetApplication | ||
include Commander::Methods | ||
|
||
# Initialize constances. | ||
def initialize | ||
@minimal_interval = 1.0 | ||
@maximal_elapsed_time = 600.0 | ||
@base_dir = 'mathnet' | ||
end | ||
|
||
# Process programm call via command line. | ||
def run | ||
program :name, 'Mathnet crawler.' | ||
program :version, Mathnet::Crawler::VERSION | ||
program :description, 'Command that act like missed mathnet client.' | ||
add_download_all_command | ||
add_articles_command | ||
add_journals_command | ||
run! | ||
end | ||
|
||
def add_download_all_command | ||
command :'download all' do |c| | ||
c.description = 'Download all articals.' | ||
c.action do | ||
journals = list_journals | ||
issues = list_issues journals | ||
articles = list_articals issues | ||
download_aricles articles | ||
end | ||
end | ||
end | ||
|
||
def add_articles_command | ||
command :articles do |c| | ||
c.description = 'List articles per journal.' | ||
c.action do | ||
articles = list_articals list_issues list_journals | ||
articles.each do |article| | ||
say "#{article.journal_title}/#{article.title}" | ||
end | ||
end | ||
end | ||
end | ||
|
||
def add_journals_command | ||
command :journals do |c| | ||
c.description = 'List journals on mathnet.' | ||
c.action do | ||
journals = list_journals | ||
journals.each do |journal| | ||
say journal.title | ||
end | ||
end | ||
end | ||
end | ||
|
||
# Return all journals published on mathnet. | ||
def list_journals | ||
process_backoff do | ||
Mathnet::Crawler::Journal.list Mathnet::Crawler::Library.new | ||
end | ||
end | ||
|
||
# Return all issues that existing in passed journals. | ||
# @param journals [Array] of [Mathnet::Crawler::Journals] that having issues. | ||
def list_issues(journals) | ||
issues_lists = Parallel.map(journals, progress: 'List issues') do |journal| | ||
process_backoff { Mathnet::Crawler::Issue.list journal } | ||
end | ||
issues_lists.reduce do |initial, item| | ||
initial + item | ||
end | ||
end | ||
|
||
# Return all articles that existing in passed issues. | ||
# @param issues [Array] of [Mathnet::Crawler::Issue] that having articles. | ||
def list_articals(issues) | ||
articals_lists = Parallel.map(issues, progress: 'List articals') do |issue| | ||
process_backoff { Mathnet::Crawler::Article.list issue } | ||
end | ||
articals_lists.reduce do |initial, item| | ||
(item && initial + item) || initial | ||
end | ||
end | ||
|
||
# Store full texts of passed articles. | ||
# @param articles [Array] of [Mathnet::Crawler::Article] that having | ||
# full texts. | ||
def download_aricles(articles) | ||
Parallel.each(articles, progress: 'Download texts') do |article| | ||
pdf_path = article_path article | ||
process_backoff do | ||
article.full_text do |body| | ||
pdf = File.new pdf_path, 'w' | ||
pdf.write body | ||
pdf.close | ||
end | ||
end | ||
end | ||
end | ||
|
||
# Execut block with exponential backoff | ||
# @param &block [block] do http request and if http error occured, | ||
# we would retry it. | ||
def process_backoff(&block) | ||
backoff.until_success do | ||
begin | ||
block.call | ||
rescue Net::HTTPServerException | ||
false | ||
end | ||
end | ||
end | ||
|
||
def article_path(article) | ||
directory = File.join @base_dir, article.journal_title | ||
FileUtils.mkdir_p directory if Dir.exist?(directory) | ||
File.join directory, "#{article.title}.pdf" | ||
end | ||
|
||
def backoff | ||
ExponentialBackoff.new @minimal_interval, @maximal_elapsed_time | ||
end | ||
end | ||
|
||
MathnetApplication.new.run if $PROGRAM_NAME == __FILE__ | ||
Mathnet::Application.new.run if $PROGRAM_NAME == __FILE__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
#!/usr/bin/env ruby | ||
require 'rubygems' | ||
require 'commander' | ||
require 'parallel' | ||
require 'net/http' | ||
require 'mathnet/crawler' | ||
require 'exponential_backoff' | ||
|
||
module Mathnet | ||
# CLI for search and download articles from mathnet site. | ||
class Application | ||
include Commander::Methods | ||
|
||
# Initialize constances. | ||
def initialize | ||
@minimal_interval = 1.0 | ||
@maximal_elapsed_time = 600.0 | ||
@base_dir = 'mathnet' | ||
end | ||
|
||
# Process programm call via command line. | ||
def run | ||
program :name, 'Mathnet crawler.' | ||
program :version, Crawler::VERSION | ||
program :description, 'Command that act like missed mathnet client.' | ||
add_download_all_command | ||
add_articles_command | ||
add_journals_command | ||
run! | ||
end | ||
|
||
def add_download_all_command | ||
command :'download all' do |c| | ||
c.description = 'Download all articals.' | ||
c.action do | ||
journals = list_journals | ||
issues = list_issues journals | ||
articles = list_articals issues | ||
download_aricles articles | ||
end | ||
end | ||
end | ||
|
||
def add_articles_command | ||
command :articles do |c| | ||
c.description = 'List articles per journal.' | ||
c.action do | ||
articles = list_articals list_issues list_journals | ||
articles.each do |article| | ||
say "#{article.journal_title}/#{article.title}" | ||
end | ||
end | ||
end | ||
end | ||
|
||
def add_journals_command | ||
command :journals do |c| | ||
c.description = 'List journals on mathnet.' | ||
c.action do | ||
journals = list_journals | ||
journals.each do |journal| | ||
say journal.title | ||
end | ||
end | ||
end | ||
end | ||
|
||
# Return all journals published on mathnet. | ||
def list_journals | ||
process_backoff do | ||
Crawler::Journal.list Crawler::Library.new | ||
end | ||
end | ||
|
||
# Return all issues that existing in passed journals. | ||
# @param journals [Array] of [Mathnet::Crawler::Journals] with issues. | ||
def list_issues(journals) | ||
issues_lists = Parallel.map(journals, progress: 'List issues') do |j| | ||
process_backoff { Crawler::Issue.list j } | ||
end | ||
issues_lists.reduce do |initial, item| | ||
initial + item | ||
end | ||
end | ||
|
||
# Return all articles that existing in passed issues. | ||
# @param issues [Array] of [Mathnet::Crawler::Issue] that having articles. | ||
def list_articals(issues) | ||
articals_lists = Parallel.map(issues, progress: 'List articals') do |i| | ||
process_backoff { Crawler::Article.list i } | ||
end | ||
articals_lists.reduce do |initial, item| | ||
(item && initial + item) || initial | ||
end | ||
end | ||
|
||
# Store full texts of passed articles. | ||
# @param articles [Array] of [Mathnet::Crawler::Article] that having | ||
# full texts. | ||
def download_aricles(articles) | ||
Parallel.each(articles, progress: 'Download texts') do |article| | ||
pdf_path = article_path article | ||
process_backoff do | ||
article.full_text do |body| | ||
pdf = File.new pdf_path, 'w' | ||
pdf.write body | ||
pdf.close | ||
end | ||
end | ||
end | ||
end | ||
|
||
# Execut block with exponential backoff | ||
# @param &block [block] do http request and if http error occured, | ||
# we would retry it. | ||
def process_backoff(&block) | ||
backoff.until_success do | ||
begin | ||
block.call | ||
rescue Net::HTTPServerException | ||
false | ||
end | ||
end | ||
end | ||
|
||
def article_path(article) | ||
directory = File.join @base_dir, article.journal_title | ||
FileUtils.mkdir_p directory if Dir.exist?(directory) | ||
File.join directory, "#{article.title}.pdf" | ||
end | ||
|
||
def backoff | ||
ExponentialBackoff.new @minimal_interval, @maximal_elapsed_time | ||
end | ||
end | ||
end |