public
Description: Ruby slugalizer ("Föö Bár!" -> "foo-bar"). Uses ActiveSupport for platform-consistent normalization. Originally by Christoffer Sawicki.
Homepage:
Clone URL: git://github.com/henrik/slugalizer.git
slugalizer / slugalizer.rb
100644 151 lines (119 sloc) 3.904 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/env ruby
#
# Slugalizer
# http://github.com/henrik/slugalizer
 
begin
  require "active_support"
rescue LoadError
  require "rubygems"
  gem 'activesupport'
  require "active_support"
end
 
module Slugalizer
  extend self
  
  SEPARATORS = %w[- _ +]
  
  def slugalize(text, separator = "-")
    unless SEPARATORS.include?(separator)
      raise "Word separator must be one of #{SEPARATORS}"
    end
    re_separator = Regexp.escape(separator)
    result = decompose(text.to_s)
    result.gsub!(/[^\x00-\x7F]+/, '') # Remove non-ASCII (e.g. diacritics).
    result.gsub!(/[^a-z0-9\-_\+]+/i, separator) # Turn non-slug chars into the separator.
    result.gsub!(/#{re_separator}{2,}/, separator) # No more than one of the separator in a row.
    result.gsub!(/^#{re_separator}|#{re_separator}$/, '') # Remove leading/trailing separator.
    result.downcase!
    result
  end
  
private
 
  def decompose(text)
    if defined?(ActiveSupport::Multibyte::Handlers) # Active Support <2.2
      ActiveSupport::Multibyte::Handlers::UTF8Handler.normalize(text, :kd).to_s
    else # ActiveSupport 2.2+
      ActiveSupport::Multibyte::Chars.new(text).normalize(:kd).to_s
    end
  end
  
end
 
 
if __FILE__ == $0
  require "test/unit"
  
  class SlugalizerTest < Test::Unit::TestCase
    def assert_slug(expected_slug, *args)
      assert_equal(expected_slug, Slugalizer.slugalize(*args))
    end
    
    def with_kcode(kcode, &block)
        old_kcode = $KCODE
        $KCODE = kcode
        block.call
      ensure
        $KCODE = old_kcode
    end
    
    def test_converting_to_string
      assert_slug("", nil)
      assert_slug("1", 1)
    end
    
    def test_identity
      assert_slug("abc-1_2_3", "abc-1_2_3")
    end
    
    def test_accented_characters
      assert_slug("acegiklnuo", "āčēģīķļņūö")
    end
    
    def test_downcasing
      assert_slug("raksmorgas", "RÄKSMÖRGÅS")
    end
    
    def test_special_characters_outside
      assert_slug("raksmorgas", " räksmörgås!?.")
    end
    
    def test_special_characters_inside
      assert_slug("raka-smorgas-nu", "räka@smörgås.nu")
    end
    
    def test_no_leading_or_trailing_separator
      assert_slug("i-love-c++", "I love C++")
      assert_slug("i-love-c", "I love C--")
    end
    
    def test_chinese_text
      assert_slug("chinese-text", "chinese 中文測試 text")
    end
    
    def test_stripped_character_then_whitespace
      assert_slug("abc", "! abc !")
    end
      
    def test_single_whitescape
      assert_slug("smorgasbord-e-gott", "smörgåsbord é gott")
    end
    
    def test_surrounding_whitescape
      assert_slug("smorgasbord-e-gott", " smörgåsbord é gott ")
    end
    
    def test_excessive_whitescape
      assert_slug("smorgasbord-ar-gott", "smörgåsbord \n är \t gott")
    end
    
    def test_squeeze_separators
      assert_slug("a-b", "a - b")
      assert_slug("a-b", "a--b")
    end
    
    def test_separator_parameter
      assert_slug("smorgasbord-ar-gott", "smörgåsbord är gott", "-")
      assert_slug("smorgasbord_ar_gott", "smörgåsbord är gott", "_")
      assert_slug("smorgasbord+ar+gott", "smörgåsbord är gott", "+")
    end
    
    def test_invalid_separator
      assert_raise(RuntimeError) do
        Slugalizer.slugalize("smörgåsbord är gott", "@")
      end
    end
    
    def test_handling_of_separator_chars
      assert_slug("abc_-_1_2_3", "abc - 1_2_3", "_")
    end
    
    def test_other_separators_are_left_alone
      assert_slug("foo-+-b_a_r", "foo + b_a_r")
    end
    
    def test_with_kcode_set_to_none
      with_kcode('n') do
        assert_slug("raksmorgas", "räksmörgås 中")
      end
    end
    
    def test_with_kcode_set_to_utf_8
      with_kcode('u') do
        assert_slug("raksmorgas", "räksmörgås 中")
      end
    end
 
  end
end