Permalink
Browse files

Remove spaces from between adjacent words of the same charset...

  • Loading branch information...
ConradIrwin committed Apr 10, 2011
1 parent 1661af3 commit 9c41b9aa606376d0be87bbde0da553968cd17852
Showing with 9 additions and 2 deletions.
  1. +4 −1 lib/rfc2047.rb
  2. +1 −1 rfc2047.gemspec
  3. +4 −0 test/rfc2047_test.rb
View
@@ -16,12 +16,15 @@ module Rfc2047
WORD = /=\?([!#$\%&'*+-\/0-9A-Z\\^\`a-z{|}~]+)\?([BbQq])\?([!->@-~]+)\?=/ # :nodoc:
+ # Look for two adjacent words in the same encoding.
+ ADJACENT_WORDS = /(#{WORD})[\s\r\n]+(?==\?(\2)\?([BbQq])\?)/
+
# Decodes a string, +from+, containing RFC 2047 encoded words into a target
# character set, +target+ defaulting to utf-8. See iconv_open(3) for information on the
# supported target encodings. If one of the encoded words cannot be
# converted to the target encoding, it is left in its encoded form.
def self.decode(from, target='utf-8')
- from.gsub(WORD) do |word|
+ from.gsub(ADJACENT_WORDS, "\\1").gsub(WORD) do |word|
cs = $1
encoding = $2
text = $3
View
@@ -1,6 +1,6 @@
Gem::Specification.new do |s|
s.name = "rfc2047"
- s.version = "0.1"
+ s.version = "0.2"
s.platform = Gem::Platform::RUBY
s.authors = ["Sam Roberts", "Conrad Irwin"]
s.email = "conrad.irwin@gmail.com"
View
@@ -62,6 +62,10 @@ def test_cases
'iso-8859-1' => 'If you can read this yo u understand the example.',
},
+ "=?windows-1252?Q?Re=3A_Your_tweet=3A_rapportive_=97_=22=40rapportive_still_use_i?=\r\n =?windows-1252?Q?t_every_day=22?=" => {
+ 'utf-8' => %(Re: Your tweet: rapportive \342\200\224 "@rapportive still use it every day")
+ },
+
'=?ISO-8859-1?Q?Olle_J=E4rnefors?= <ojarnef / admin.kth.se>' => {'iso-8859-1' => "Olle J\xE4rnefors <ojarnef / admin.kth.se>",
},

0 comments on commit 9c41b9a

Please sign in to comment.