public
Description: Some scripts I have written and tend to keep up to date. They reside in my personal bin folder.
Homepage: http://blog.bogojoker.com
Clone URL: git://github.com/JosephPecoraro/scripts.git
commit  10364cfc7dda6b91295160dd15b36a6b4e85fa0a
tree    cc1f003408ad4dbb46f8db14d4a5809941bbf288
parent  56c41b06f9c9d2a80525cf9a79207cf1382ffc05
scripts / bigpicvotes
100755 120 lines (93 sloc) 2.633 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/env ruby
# Start Date: Wednesday November 19, 2008
# Current Version: 0.9
# Author: Joseph Pecoraro
# Contact: joepeck02@gmail.com
# Decription: Analyze the user comments for top images
# because the commenters often put the numbers for
# the images they really liked.
 
require 'rubygems'
require 'hpricot'
require 'net/http'
require 'uri'
 
 
# Wrapper around the functionality
class BigPictureAnalyzer
  
  # Takes in a valid Hpricot doc (assumes validation
  # of the URL it was taken from occured elsewhere)
  def initialize(url, body)
@url = url
@doc = Hpricot(body)
@votes = nil
  end
 
 
# Analyze the doc
def analyze(n=0)
 
# The title
@title = (@doc/'h2')[0].innerText
 
# Find the Number of images to make a vote array
max = (@doc/'div.bpBody a')[-1].innerText.match(/\d+/).to_s.to_i
@votes = Array.new(max+1, 0)
 
# Search Comments
(@doc/'.commentBodyText').each do |com|
com.to_s.gsub(/https?:.*?(\s|[<>])/i, 'zz').scan(/\d+/) do |num|
num = num.to_i
@votes[num] = @votes[num] + 1 if num <= max && num > 0
end
end
 
# Ugly Sort Trick
# [1, 5, 3] => ["1:0", "5:1", "3:2"] => sort => ["5:1", "3:2", "1:0"]
@votes.each_with_index { |e, i| @votes[i] = "#{@votes[i]}:#{i}" }
@votes = @votes.sort.sort do |a,b|
numa = a.split(/:/)[0].to_i
numb = b.split(/:/)[0].to_i
numb - numa
end
 
# Possibly print the results
print_top_n(n) if n > 0
 
end
 
 
# Print the results
def print_top_n(n=10)
 
# Error handling
n = 10 if n <= 0
n = @votes.length-1 if n > @votes.length
 
# Print header and top n
puts
puts "----------------------------"
puts @title
puts "Top Images Based on Comments"
puts "----------------------------\n\n"
0.upto(n-1) do |i|
cnt, img = @votes[i].split(':')
puts " %2d: Image #%-2d - %d votes" % [i+1, img, cnt]
end
puts
 
end
 
end
 
 
 
# When run as as script
if $0 == __FILE__
 
# Expect at least one command line arg (1 to 2 args)
unless (1..2) === ARGV.size
program_name = $0.split(/\//).last
puts "usage: #{program_name} url [top#]"
exit 1
end
 
# Command line args
url = ARGV[0]
n = ARGV[1].to_i || 10
n = 10 if n <= 0
 
# Turn the url into a 'comments_' url if needed
uri = URI.parse(url)
parts = uri.path.split(/\//)
last = parts.last
unless last =~ /^comments_/
parts[ parts.length-1 ] = 'comments_' + last
uri.path = parts.join('/')
end
 
# Validate the the URI exists, and parse it if it does
begin
res = Net::HTTP.get_response uri
BigPictureAnalyzer.new(url, res.body).analyze(n)
rescue
puts "Page did not exist. Please correct the URL"
exit 2
end
 
end