public
Description: Script to use against an HTTP log to find out who points directly towards your files
Homepage:
Clone URL: git://github.com/sunny/damn_hotlinkers.git
damn_hotlinkers / damn_hotlinkers.rb
100644 83 lines (67 sloc) 2.191 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/ruby
# Die, damn hotlinkers!
# by Sunny Ripert - sunfox.org
# Use it against an HTTP log to find out who points directly towards your files
 
require 'set'
require 'date'
 
class DamnHotlinkers
  MATCH_URI = /\.(jpg|jpeg|png|gif|mp3|ogg|mov|flv|mpeg|swf|avi)$/
  IGNORE_REF = Regexp.compile %w(
http://www.google.com/reader
http://images.search.yahoo.com
/search?q=cache:
).map { |uri| Regexp.escape uri }.join('|')
 
  attr_reader :leeches
 
  def initialize(days_ago = nil)
    @leeches = Hash.new { |h,k| h[k] = Set.new } # handy hash defaults to empty Set
    @min_date = days_ago.nil? ? nil : Date.today - days_ago
  end
 
  # Add leeches from an Apache-like HTTP logfile
  # filename accepts whatever resource open() accepts
  def load(filename)
    open(filename).each do |line|
      split = line.split
 
      domain = split[1]
      uri = split[6]
      uri = "http://#{domain}#{uri}"
      next unless uri =~ MATCH_URI
 
      ref = split[10]
      ref = ref[1..ref.length-2] rescue next # unquote the referrer
      next if ref == "-" or ref =~ /:\/\/#{domain}/ or ref =~ IGNORE_REF
 
      next if !@min_date.nil? and Date.parse(split[3]) < @min_date
 
      @leeches[uri].add ref
    end
    self
  end
 
  # Pretty representation of sorted leeches
  def to_s(rep = nil)
    return to_html if rep == :html
    leeches.sort.collect do |uri, referrers|
      [uri] + referrers.sort.collect { |ref| " -> #{ref}" }
    end.join("\n")
  end
 
  # Same thing with a little more markup
  def to_html
    lis = leeches.sort.collect do |uri, referrers|
      refs = referrers.sort.collect { |ref| "<li><a href='#{ref}'>#{ref}</a></li>\n" }
      "<li><a href='#{uri}'>#{uri}</a><ul>#{refs}</ul></li>\n"
    end
    "<ul>#{lis}</ul>"
  end
end
 
if __FILE__ == $0
  days_ago = nil
  if ARGV.first =~ /^--days=(\d+)$/
    days_ago = $1.to_i
    ARGV.shift
  end
 
  representation = nil
  if ARGV.first == "--html"
    representation = :html
    ARGV.shift
  end
 
  abort "Usage: #{$0} [--days=N] [--html] file.log [file2.log ...]" if ARGV.empty?
 
  damn_them = DamnHotlinkers.new(days_ago)
  ARGV.each { |arg| damn_them.load(arg) }
  puts damn_them.to_s(representation)
end