sunny / damn_hotlinkers

Script to use against an HTTP log to find out who points directly towards your files

This URL has Read+Write access

damn_hotlinkers / damn_hotlinkers.rb
100644 51 lines (40 sloc) 1.253 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/ruby
# Die, damn hotlinkers!
# by Sunny Ripert - sunfox.org
# Use it against an HTTP log to find out who points directly towards your files
 
require 'set'
 
class DamnHotlinkers
  MATCH_URI = /\.(jpg|jpeg|png|gif|mp3|ogg|mov|flv|mpeg|swf|avi)$/
 
  attr_reader :leeches
 
  def initialize
    @leeches = Hash.new { |h,k| h[k] = Set.new } # handy hash defaults to empty Set
  end
 
  # Add leeches from an Apache-like HTTP logfile
  # filename accepts whatever resource open() accepts
  def load(filename)
    open(filename).each do |line|
      split = line.split
 
      domain = split[1]
      uri = split[6]
      uri = "http://#{domain}#{uri}"
      next unless uri =~ MATCH_URI
 
      ref = split[10]
      ref = ref[1..ref.length-2] rescue next # unquote the referrer
      next if ref == "-" or ref =~ /:\/\/#{domain}/
 
      @leeches[uri].add ref
    end
    self
  end
 
  # Pretty representation of sorted leeches
  def to_s
    leeches.sort.collect do |uri, referrers|
      [uri] + referrers.sort.collect { |ref| " -> #{ref}" }
    end.join("\n")
  end
end
 
if __FILE__ == $0
  abort "Usage: #{$0} file.log [file2.log ...]" if ARGV.empty?
  damn_them = DamnHotlinkers.new
  ARGV.each { |arg| damn_them.load(arg) }
  puts damn_them
end