Skip to content
This repository
Browse code

Merge pull request #592 from WilliamBZA/master

Reworked TransformAndExtractUrls and fixed tests for it.
  • Loading branch information...
commit db443b1b8ae025006de9d2fb89e51588c34cff64 2 parents f9414fa + c6292ef
Sam Moore authored September 27, 2012
262  JabbR.Tests/TextTransformFacts.cs
@@ -196,6 +196,21 @@ public void UrlWithParenthesesIsTransformed()
196 196
             }
197 197
 
198 198
             [Fact]
  199
+            public void UrlWithSingleTrailingParanthesisMatchesCloseBracketAsText()
  200
+            {
  201
+                // Arrange
  202
+                var message = "(message http://www.jabbr.net/) doesn't match the outside brackets";
  203
+                HashSet<string> extractedUrls;
  204
+
  205
+                // Act
  206
+                var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
  207
+
  208
+                // Assert
  209
+                Assert.Equal("(message <a rel=\"nofollow external\" target=\"_blank\" href=\"http://www.jabbr.net/\" title=\"http://www.jabbr.net/\">http://www.jabbr.net/</a>) doesn't match the outside brackets", result);
  210
+
  211
+            }
  212
+
  213
+            [Fact]
199 214
             public void UrlWithUnicodeIsTransformed()
200 215
             { 
201 216
                 //arrange
@@ -226,14 +241,14 @@ public void UrlWithUnicodeIsTransformed()
226 241
             public void UrlWithCallbacks()
227 242
             {
228 243
                 //arrange
229  
-                var message = @"http://a.co/a.png#""onerror='alert(&quot;Eek!&quot;)'";
  244
+                var message = @"http://a.co/a.png#&quot;onerror=&#39;alert(&quot;Eek!&quot;)'";
230 245
                 HashSet<string> extractedUrls;
231 246
 
232 247
                 //act
233 248
                 var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
234 249
 
235 250
                 //assert
236  
-                Assert.Equal(@"http://a.co/a.png#""onerror='alert(&quot;Eek!&quot;)'", result);
  251
+                Assert.Equal(@"http://a.co/a.png#&quot;onerror=&#39;alert(&quot;Eek!&quot;)'", result);
237 252
             }
238 253
 
239 254
             [Fact]
@@ -251,7 +266,7 @@ public void UrlWithAmpersand()
251 266
             }
252 267
 
253 268
             [Fact]
254  
-            public void UrlWithInvalidButEscapedCharacters()
  269
+            public void UrlWithInvalidButEscapedCharactersMatchesValidUrlSection()
255 270
             {
256 271
                 //arrange
257 272
                 var message = "message http://google.com/&lt;a&gt; continues on";
@@ -261,7 +276,35 @@ public void UrlWithInvalidButEscapedCharacters()
261 276
                 var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
262 277
 
263 278
                 //assert
264  
-                Assert.Equal("message http://google.com/&lt;a&gt; continues on", result);
  279
+                Assert.Equal("message <a rel=\"nofollow external\" target=\"_blank\" href=\"http://google.com/\" title=\"http://google.com/\">http://google.com/</a><a> continues on", result);
  280
+            }
  281
+
  282
+            [Fact]
  283
+            public void UrlWithTrailingQuotationsMatchesUrlButNotTrailingQuotation()
  284
+            {
  285
+                // Arrange
  286
+                var message = "\"Check out www.Jabbr.net/\"";
  287
+                HashSet<string> extractedUrls;
  288
+
  289
+                // Act
  290
+                var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
  291
+
  292
+                // Assert
  293
+                Assert.Equal("\"Check out <a rel=\"nofollow external\" target=\"_blank\" href=\"http://www.Jabbr.net/\" title=\"www.Jabbr.net/\">www.Jabbr.net/</a>\"", result);
  294
+            }
  295
+
  296
+            [Fact]
  297
+            public void EncodedUrlWithTrailingQuotationsMatchesUrlButNotTrailingQuotation()
  298
+            {
  299
+                // Arrange
  300
+                var message = "&quot;Visit http://www.jabbr.net/&quot;";
  301
+                HashSet<string> extractedUrls;
  302
+
  303
+                // Act
  304
+                var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
  305
+
  306
+                // Assert
  307
+                Assert.Equal("\"Visit <a rel=\"nofollow external\" target=\"_blank\" href=\"http://www.jabbr.net/\" title=\"http://www.jabbr.net/\">http://www.jabbr.net/</a>\"", result);
265 308
             }
266 309
 
267 310
             [Fact]
@@ -277,6 +320,217 @@ public void LocalHost()
277 320
                 //assert
278 321
                 Assert.Equal("<a rel=\"nofollow external\" target=\"_blank\" href=\"http://localhost/foo\" title=\"http://localhost/foo\">http://localhost/foo</a>", result);
279 322
             }
  323
+
  324
+            [Fact]
  325
+            public void UrlsFollowedByACommaDontEncodeTheComma()
  326
+            {
  327
+                // Arrange
  328
+                var message = @"found him, hes https://twitter.com/dreamer3, sent him a tweet";
  329
+                HashSet<string> extractedUrls;
  330
+
  331
+                // Act
  332
+                var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
  333
+
  334
+                // Assert
  335
+                Assert.Equal("found him, hes <a rel=\"nofollow external\" target=\"_blank\" href=\"https://twitter.com/dreamer3\" title=\"https://twitter.com/dreamer3\">https://twitter.com/dreamer3</a>, sent him a tweet", result);
  336
+            }
  337
+
  338
+            [Fact]
  339
+            public void UrlsThatContainCommasAreEncodedEntirely()
  340
+            {
  341
+                // Arrange
  342
+                var message = @"found him, hes https://twitter.com/d,r,e,a,m,e,r,3, sent him a tweet";
  343
+                HashSet<string> extractedUrls;
  344
+
  345
+                // Act
  346
+                var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
  347
+
  348
+                // Assert
  349
+                Assert.Equal("found him, hes <a rel=\"nofollow external\" target=\"_blank\" href=\"https://twitter.com/d,r,e,a,m,e,r,3\" title=\"https://twitter.com/d,r,e,a,m,e,r,3\">https://twitter.com/d,r,e,a,m,e,r,3</a>, sent him a tweet", result);
  350
+            }
  351
+
  352
+            [Fact]
  353
+            public void LeftParenthesis()
  354
+            {
  355
+                //arrange
  356
+                var message = @"(http://foo.com";
  357
+                HashSet<string> extractedUrls;
  358
+
  359
+                //act
  360
+                var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
  361
+
  362
+                //assert
  363
+                Assert.Equal("(<a rel=\"nofollow external\" target=\"_blank\" href=\"http://foo.com\" title=\"http://foo.com\">http://foo.com</a>", result);
  364
+            }
  365
+
  366
+            [Fact]
  367
+            public void RightParenthesis()
  368
+            {
  369
+                //arrange
  370
+                var message = @"http://foo.com)";
  371
+                HashSet<string> extractedUrls;
  372
+
  373
+                //act
  374
+                var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
  375
+
  376
+                //assert
  377
+                Assert.Equal("<a rel=\"nofollow external\" target=\"_blank\" href=\"http://foo.com\" title=\"http://foo.com\">http://foo.com</a>)", result);
  378
+            }
  379
+
  380
+            [Fact]
  381
+            public void BothParenthesis()
  382
+            {
  383
+                //arrange
  384
+                var message = @"(http://foo.com)";
  385
+                HashSet<string> extractedUrls;
  386
+
  387
+                //act
  388
+                var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
  389
+
  390
+                //assert
  391
+                Assert.Equal("(<a rel=\"nofollow external\" target=\"_blank\" href=\"http://foo.com\" title=\"http://foo.com\">http://foo.com</a>)", result);
  392
+            }
  393
+
  394
+            [Fact]
  395
+            public void MSDN()
  396
+            {
  397
+                //arrange
  398
+                var message = @"http://msdn.microsoft.com/en-us/library/system.linq.enumerable(v=vs.110).aspx";
  399
+                HashSet<string> extractedUrls;
  400
+
  401
+                //act
  402
+                var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
  403
+
  404
+                //assert
  405
+                Assert.Equal("<a rel=\"nofollow external\" target=\"_blank\" href=\"http://msdn.microsoft.com/en-us/library/system.linq.enumerable(v=vs.110).aspx\" title=\"http://msdn.microsoft.com/en-us/library/system.linq.enumerable(v=vs.110).aspx\">http://msdn.microsoft.com/en-us/library/system.linq.enumerable(v=vs.110).aspx</a>", result);
  406
+            }
  407
+
  408
+            [Fact]
  409
+            public void MoreThanOneSetOfParens()
  410
+            {
  411
+                //arrange
  412
+                var message = @"http://foo.com/more_(than)_one_(parens)";
  413
+                HashSet<string> extractedUrls;
  414
+
  415
+                //act
  416
+                var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
  417
+
  418
+                //assert
  419
+                Assert.Equal("<a rel=\"nofollow external\" target=\"_blank\" href=\"http://foo.com/more_(than)_one_(parens)\" title=\"http://foo.com/more_(than)_one_(parens)\">http://foo.com/more_(than)_one_(parens)</a>", result);
  420
+            }
  421
+
  422
+            [Fact]
  423
+            public void WikiWithParensAndHash()
  424
+            {
  425
+                //arrange
  426
+                var message = @"http://foo.com/blah_(wikipedia)#cite-1";
  427
+                HashSet<string> extractedUrls;
  428
+
  429
+                //act
  430
+                var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
  431
+
  432
+                //assert
  433
+                Assert.Equal("<a rel=\"nofollow external\" target=\"_blank\" href=\"http://foo.com/blah_(wikipedia)#cite-1\" title=\"http://foo.com/blah_(wikipedia)#cite-1\">http://foo.com/blah_(wikipedia)#cite-1</a>", result);
  434
+            }
  435
+
  436
+            [Fact]
  437
+            public void WikiWithParensAndMoreAndHash()
  438
+            {
  439
+                //arrange
  440
+                var message = @"http://foo.com/blah_(wikipedia)_blah#cite-1";
  441
+                HashSet<string> extractedUrls;
  442
+
  443
+                //act
  444
+                var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
  445
+
  446
+                //assert
  447
+                Assert.Equal("<a rel=\"nofollow external\" target=\"_blank\" href=\"http://foo.com/blah_(wikipedia)_blah#cite-1\" title=\"http://foo.com/blah_(wikipedia)_blah#cite-1\">http://foo.com/blah_(wikipedia)_blah#cite-1</a>", result);
  448
+            }
  449
+
  450
+            [Fact]
  451
+            public void BitLyWithoutHttp()
  452
+            {
  453
+                //arrange
  454
+                var message = @"bit.ly/foo";
  455
+                HashSet<string> extractedUrls;
  456
+
  457
+                //act
  458
+                var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
  459
+
  460
+                //assert
  461
+                Assert.Equal("<a rel=\"nofollow external\" target=\"_blank\" href=\"http://bit.ly/foo\" title=\"bit.ly/foo\">bit.ly/foo</a>", result);
  462
+            }
  463
+
  464
+            [Fact]
  465
+            public void UnicodeInParens()
  466
+            {
  467
+                //arrange
  468
+                var message = @"http://foo.com/unicode_(✪)_in_parens";
  469
+                HashSet<string> extractedUrls;
  470
+
  471
+                //act
  472
+                var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
  473
+
  474
+                //assert
  475
+                Assert.Equal("<a rel=\"nofollow external\" target=\"_blank\" href=\"http://foo.com/unicode_(&#10026;)_in_parens\" title=\"http://foo.com/unicode_(✪)_in_parens\">http://foo.com/unicode_(✪)_in_parens</a>", result);
  476
+            }
  477
+
  478
+            [Fact]
  479
+            public void SomethingAfterParens()
  480
+            {
  481
+                //arrange
  482
+                var message = @"http://foo.com/(something)?after=parens";
  483
+                HashSet<string> extractedUrls;
  484
+
  485
+                //act
  486
+                var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
  487
+
  488
+                //assert
  489
+                Assert.Equal("<a rel=\"nofollow external\" target=\"_blank\" href=\"http://foo.com/(something)?after=parens\" title=\"http://foo.com/(something)?after=parens\">http://foo.com/(something)?after=parens</a>", result);
  490
+            }
  491
+
  492
+            [Fact]
  493
+            public void UrlInsideAQuotedSentence()
  494
+            {
  495
+                //arrange
  496
+                var message = "This is a sentence with quotes and a url ... see \"http://foo.com\"";
  497
+                HashSet<string> extractedUrls;
  498
+
  499
+                //act
  500
+                var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
  501
+
  502
+                //assert
  503
+                Assert.Equal("This is a sentence with quotes and a url ... see \"<a rel=\"nofollow external\" target=\"_blank\" href=\"http://foo.com\" title=\"http://foo.com\">http://foo.com</a>\"", result);
  504
+            }
  505
+
  506
+            [Fact]
  507
+            public void UrlEndsWithSlashInsideAQuotedSentence()
  508
+            {
  509
+                //arrange
  510
+                var message = "\"Visit http://www.jabbr.net/\"";
  511
+                HashSet<string> extractedUrls;
  512
+
  513
+                //act
  514
+                var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
  515
+
  516
+                //assert
  517
+                Assert.Equal("\"Visit <a rel=\"nofollow external\" target=\"_blank\" href=\"http://www.jabbr.net/\" title=\"http://www.jabbr.net/\">http://www.jabbr.net/</a>\"", result);
  518
+            }
  519
+
  520
+            [Fact]
  521
+            public void GoogleUrlWithQueryStringParams()
  522
+            {
  523
+                //arrange
  524
+                var message = "https://www.google.com/search?q=test+search&amp;sugexp=chrome,mod=14&amp;sourceid=chrome&amp;ie=UTF-8";
  525
+                HashSet<string> extractedUrls;
  526
+
  527
+                //act
  528
+                var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
  529
+
  530
+                //assert
  531
+                Assert.Equal("<a rel=\"nofollow external\" target=\"_blank\" href=\"https://www.google.com/search?q=test+search&amp;sugexp=chrome,mod=14&amp;sourceid=chrome&amp;ie=UTF-8\" title=\"https://www.google.com/search?q=test+search&amp;sugexp=chrome,mod=14&amp;sourceid=chrome&amp;ie=UTF-8\">https://www.google.com/search?q=test+search&amp;sugexp=chrome,mod=14&amp;sourceid=chrome&amp;ie=UTF-8</a>", result);
  532
+                //Assert.Equal("<a rel=\"nofollow external\" target=\"_blank\" href=\"https://www.google.com/search?q=test+search&sugexp=chrome,mod=14&sourceid=chrome&ie=UTF-8\" title=\"https://www.google.com/search?q=test+search&amp;sugexp=chrome,mod=14&amp;sourceid=chrome&amp;ie=UTF-8\">https://www.google.com/search?q=test+search&amp;sugexp=chrome,mod=14&amp;sourceid=chrome&amp;ie=UTF-8</a>", result);
  533
+            }
280 534
         }
281 535
     }
282 536
 }
7  JabbR/Infrastructure/TextTransform.cs
@@ -43,10 +43,11 @@ private string ConvertTextWithNewLines(string message)
43 43
             return message;
44 44
         }
45 45
 
46  
-        static Regex urlPattern = new Regex(@"(?:(?:https?|ftp)://|www\.)[^\s]+", RegexOptions.Compiled | RegexOptions.IgnoreCase);
  46
+        static Regex urlPattern = new Regex(@"(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'"".,<>?«»“”‘’]))", RegexOptions.Compiled | RegexOptions.IgnoreCase);
47 47
 
48 48
         public static string TransformAndExtractUrls(string message, out HashSet<string> extractedUrls)
49 49
         {
  50
+            message = HttpUtility.HtmlDecode(message);
50 51
             var urls = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
51 52
             message = urlPattern.Replace(message, m =>
52 53
             {
@@ -58,7 +59,7 @@ public static string TransformAndExtractUrls(string message, out HashSet<string>
58 59
 
59 60
                 if (!Uri.IsWellFormedUriString(url, UriKind.Absolute))
60 61
                 {
61  
-                    return m.Value;
  62
+                    return HttpUtility.HtmlEncode(m.Value);
62 63
                 }
63 64
 
64 65
                 urls.Add(url);
@@ -66,7 +67,7 @@ public static string TransformAndExtractUrls(string message, out HashSet<string>
66 67
                 return String.Format(CultureInfo.InvariantCulture,
67 68
                                      "<a rel=\"nofollow external\" target=\"_blank\" href=\"{0}\" title=\"{1}\">{1}</a>",
68 69
                                      Encoder.HtmlAttributeEncode(url),
69  
-                                     m.Value);
  70
+                                     HttpUtility.HtmlEncode(m.Value));
70 71
             });
71 72
 
72 73
             extractedUrls = urls;

0 notes on commit db443b1

Please sign in to comment.
Something went wrong with that request. Please try again.