From 8ce9860590ec30409939fbf7b61325964e1fe6bb Mon Sep 17 00:00:00 2001 From: David Yip Date: Fri, 6 Sep 2013 00:06:25 -0500 Subject: [PATCH] ArchiveBot spike. --- Gemfile | 9 ++++++++ Gemfile.lock | 60 +++++++++++++++++++++++++++++++++++++++++++++++++ archive.rb | 9 ++++++++ bot.rb | 42 ++++++++++++++++++++++++++++++++++ brain.rb | 50 +++++++++++++++++++++++++++++++++++++++++ job_tracking.rb | 20 +++++++++++++++++ 6 files changed, 190 insertions(+) create mode 100644 Gemfile create mode 100644 Gemfile.lock create mode 100644 archive.rb create mode 100644 bot.rb create mode 100644 brain.rb create mode 100644 job_tracking.rb diff --git a/Gemfile b/Gemfile new file mode 100644 index 00000000..38f1e1b0 --- /dev/null +++ b/Gemfile @@ -0,0 +1,9 @@ +source "https://rubygems.org" + +gem 'capybara' +gem 'cinch', :git => 'https://github.com/cinchrb/cinch.git' +gem 'poltergeist' +gem 'redis-namespace' +gem 'sidekiq' +gem 'trollop' +gem 'uuidtools' diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 00000000..fc0bae03 --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,60 @@ +GIT + remote: https://github.com/cinchrb/cinch.git + revision: a78bfad4adea7498fcfefc2f1acf84d5e70bcb9c + specs: + cinch (2.0.9) + +GEM + remote: https://rubygems.org/ + specs: + capybara (2.1.0) + mime-types (>= 1.16) + nokogiri (>= 1.3.3) + rack (>= 1.0.0) + rack-test (>= 0.5.4) + xpath (~> 2.0) + celluloid (0.15.0) + timers (~> 1.1.0) + cliver (0.2.1) + connection_pool (1.1.0) + json (1.8.0) + mime-types (1.25) + mini_portile (0.5.1) + multi_json (1.7.9) + nokogiri (1.6.0) + mini_portile (~> 0.5.0) + poltergeist (1.4.1) + capybara (~> 2.1.0) + cliver (~> 0.2.1) + multi_json (~> 1.0) + websocket-driver (>= 0.2.0) + rack (1.5.2) + rack-test (0.6.2) + rack (>= 1.0) + redis (3.0.4) + redis-namespace (1.3.1) + redis (~> 3.0.0) + sidekiq (2.14.0) + celluloid (>= 0.14.1) + connection_pool (>= 1.0.0) + json + redis (>= 3.0.4) + redis-namespace + timers (1.1.0) + trollop (2.0) + uuidtools (2.1.4) + websocket-driver (0.2.3) + xpath (2.0.0) + nokogiri (~> 1.3) + +PLATFORMS + ruby + +DEPENDENCIES + capybara + cinch! + poltergeist + redis-namespace + sidekiq + trollop + uuidtools diff --git a/archive.rb b/archive.rb new file mode 100644 index 00000000..448faf7c --- /dev/null +++ b/archive.rb @@ -0,0 +1,9 @@ +require 'sidekiq' + +class Archive + include Sidekiq::Worker + + def perform(uri, ident) + # We do nothing for now! + end +end diff --git a/bot.rb b/bot.rb new file mode 100644 index 00000000..4a6a43ef --- /dev/null +++ b/bot.rb @@ -0,0 +1,42 @@ +require 'cinch' +require 'redis-namespace' +require 'trollop' +require 'uri' + +require File.expand_path('../brain', __FILE__) + +opts = Trollop.options do + opt :server, 'IRC server, expressed as a URI (irc://SERVER:PORT or //SERVER:PORT)', :type => String + opt :nick, 'Nick to use', :default => 'ArchiveBot' + opt :channels, 'Comma-separated list of channels', :type => String + opt :schemes, 'Comma-separated list of acceptable URI schemes', :default => 'http,https' + opt :redis, 'Redis server for job tracking', :default => 'redis://localhost:6379' + opt :redis_namespace, 'Redis namespace for job tracking', :default => 'archivebot' +end + +%w(server nick channels).each do |opt| + Trollop.die "#{opt} is required" unless opts[opt.to_sym] +end + +schemes = opts[:schemes].split(',').map(&:strip) +channels = opts[:channels].split(',').map(&:strip) +uri = URI.parse(opts[:server]) +rconn = Redis.new(:url => opts[:redis]) +redis = Redis::Namespace.new(opts[:redis_namespace], :redis => rconn) + +bot = Cinch::Bot.new do + configure do |c| + c.server = uri.host + c.port = uri.port + c.nick = opts[:nick] + c.channels = channels + end + + brain = Brain.new(redis, schemes) + + on :message, /\A\!archive (.+)\Z/ do |m, param| + brain.request_archive(m, param) + end +end + +bot.start diff --git a/brain.rb b/brain.rb new file mode 100644 index 00000000..1ac40bb4 --- /dev/null +++ b/brain.rb @@ -0,0 +1,50 @@ +require 'uri' + +require File.expand_path('../archive', __FILE__) +require File.expand_path('../job_tracking', __FILE__) + +class Brain + include JobTracking + + attr_reader :redis + attr_reader :schemes + + def initialize(redis, schemes) + @redis = redis + @schemes = schemes + end + + def request_archive(m, param) + # Do we have a valid URI? + begin + uri = URI.parse(param).normalize + rescue URI::InvalidURIError => e + reply m, "Sorry, that doesn't look like a URL to me." + return + end + + # Is the URI in our list of recognized schemes? + if !schemes.include?(uri.scheme) + reply m, "Sorry, I can only handle #{schemes.join(', ')}." + return + end + + # Is the job already known? + ident = job_ident(uri) + if has_job?(ident, redis) + reply m, "That URL is already being processed. Use !status #{ident} for updates." + return + end + + # OK, add the job and queue it up. + add_job(ident, redis) + Archive.perform_async(uri, ident) + reply m, "Archiving #{uri.to_s}; use !status #{ident} for updates." + end + + private + + def reply(m, *args) + m.reply "#{m.user.nick}: #{args.join(' ')}" + end +end diff --git a/job_tracking.rb b/job_tracking.rb new file mode 100644 index 00000000..5e2934c4 --- /dev/null +++ b/job_tracking.rb @@ -0,0 +1,20 @@ +require 'uuidtools' + +module JobTracking + ARCHIVEBOT_V0_NAMESPACE = UUIDTools::UUID.parse('82244de1-c354-4c89-bf2b-f153ce23af43') + + def job_ident(uri) + uuid = UUIDTools::UUID.sha1_create(ARCHIVEBOT_V0_NAMESPACE, uri.to_s) + + # shorten it up a bit + uuid.to_i.to_s(36) + end + + def has_job?(ident, redis) + redis.sismember('jobs', ident) + end + + def add_job(ident, redis) + redis.sadd('jobs', ident) + end +end