Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Merge pull request #592 from WilliamBZA/master

Reworked TransformAndExtractUrls and fixed tests for it.
  • Loading branch information...
commit db443b1b8ae025006de9d2fb89e51588c34cff64 2 parents f9414fa + c6292ef
@samandmoore samandmoore authored
View
262 JabbR.Tests/TextTransformFacts.cs
@@ -196,6 +196,21 @@ public void UrlWithParenthesesIsTransformed()
}
[Fact]
+ public void UrlWithSingleTrailingParanthesisMatchesCloseBracketAsText()
+ {
+ // Arrange
+ var message = "(message http://www.jabbr.net/) doesn't match the outside brackets";
+ HashSet<string> extractedUrls;
+
+ // Act
+ var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
+
+ // Assert
+ Assert.Equal("(message <a rel=\"nofollow external\" target=\"_blank\" href=\"http://www.jabbr.net/\" title=\"http://www.jabbr.net/\">http://www.jabbr.net/</a>) doesn't match the outside brackets", result);
+
+ }
+
+ [Fact]
public void UrlWithUnicodeIsTransformed()
{
//arrange
@@ -226,14 +241,14 @@ public void UrlWithUnicodeIsTransformed()
public void UrlWithCallbacks()
{
//arrange
- var message = @"http://a.co/a.png#""onerror='alert(&quot;Eek!&quot;)'";
+ var message = @"http://a.co/a.png#&quot;onerror=&#39;alert(&quot;Eek!&quot;)'";
HashSet<string> extractedUrls;
//act
var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
//assert
- Assert.Equal(@"http://a.co/a.png#""onerror='alert(&quot;Eek!&quot;)'", result);
+ Assert.Equal(@"http://a.co/a.png#&quot;onerror=&#39;alert(&quot;Eek!&quot;)'", result);
}
[Fact]
@@ -251,7 +266,7 @@ public void UrlWithAmpersand()
}
[Fact]
- public void UrlWithInvalidButEscapedCharacters()
+ public void UrlWithInvalidButEscapedCharactersMatchesValidUrlSection()
{
//arrange
var message = "message http://google.com/&lt;a&gt; continues on";
@@ -261,7 +276,35 @@ public void UrlWithInvalidButEscapedCharacters()
var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
//assert
- Assert.Equal("message http://google.com/&lt;a&gt; continues on", result);
+ Assert.Equal("message <a rel=\"nofollow external\" target=\"_blank\" href=\"http://google.com/\" title=\"http://google.com/\">http://google.com/</a><a> continues on", result);
+ }
+
+ [Fact]
+ public void UrlWithTrailingQuotationsMatchesUrlButNotTrailingQuotation()
+ {
+ // Arrange
+ var message = "\"Check out www.Jabbr.net/\"";
+ HashSet<string> extractedUrls;
+
+ // Act
+ var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
+
+ // Assert
+ Assert.Equal("\"Check out <a rel=\"nofollow external\" target=\"_blank\" href=\"http://www.Jabbr.net/\" title=\"www.Jabbr.net/\">www.Jabbr.net/</a>\"", result);
+ }
+
+ [Fact]
+ public void EncodedUrlWithTrailingQuotationsMatchesUrlButNotTrailingQuotation()
+ {
+ // Arrange
+ var message = "&quot;Visit http://www.jabbr.net/&quot;";
+ HashSet<string> extractedUrls;
+
+ // Act
+ var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
+
+ // Assert
+ Assert.Equal("\"Visit <a rel=\"nofollow external\" target=\"_blank\" href=\"http://www.jabbr.net/\" title=\"http://www.jabbr.net/\">http://www.jabbr.net/</a>\"", result);
}
[Fact]
@@ -277,6 +320,217 @@ public void LocalHost()
//assert
Assert.Equal("<a rel=\"nofollow external\" target=\"_blank\" href=\"http://localhost/foo\" title=\"http://localhost/foo\">http://localhost/foo</a>", result);
}
+
+ [Fact]
+ public void UrlsFollowedByACommaDontEncodeTheComma()
+ {
+ // Arrange
+ var message = @"found him, hes https://twitter.com/dreamer3, sent him a tweet";
+ HashSet<string> extractedUrls;
+
+ // Act
+ var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
+
+ // Assert
+ Assert.Equal("found him, hes <a rel=\"nofollow external\" target=\"_blank\" href=\"https://twitter.com/dreamer3\" title=\"https://twitter.com/dreamer3\">https://twitter.com/dreamer3</a>, sent him a tweet", result);
+ }
+
+ [Fact]
+ public void UrlsThatContainCommasAreEncodedEntirely()
+ {
+ // Arrange
+ var message = @"found him, hes https://twitter.com/d,r,e,a,m,e,r,3, sent him a tweet";
+ HashSet<string> extractedUrls;
+
+ // Act
+ var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
+
+ // Assert
+ Assert.Equal("found him, hes <a rel=\"nofollow external\" target=\"_blank\" href=\"https://twitter.com/d,r,e,a,m,e,r,3\" title=\"https://twitter.com/d,r,e,a,m,e,r,3\">https://twitter.com/d,r,e,a,m,e,r,3</a>, sent him a tweet", result);
+ }
+
+ [Fact]
+ public void LeftParenthesis()
+ {
+ //arrange
+ var message = @"(http://foo.com";
+ HashSet<string> extractedUrls;
+
+ //act
+ var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
+
+ //assert
+ Assert.Equal("(<a rel=\"nofollow external\" target=\"_blank\" href=\"http://foo.com\" title=\"http://foo.com\">http://foo.com</a>", result);
+ }
+
+ [Fact]
+ public void RightParenthesis()
+ {
+ //arrange
+ var message = @"http://foo.com)";
+ HashSet<string> extractedUrls;
+
+ //act
+ var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
+
+ //assert
+ Assert.Equal("<a rel=\"nofollow external\" target=\"_blank\" href=\"http://foo.com\" title=\"http://foo.com\">http://foo.com</a>)", result);
+ }
+
+ [Fact]
+ public void BothParenthesis()
+ {
+ //arrange
+ var message = @"(http://foo.com)";
+ HashSet<string> extractedUrls;
+
+ //act
+ var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
+
+ //assert
+ Assert.Equal("(<a rel=\"nofollow external\" target=\"_blank\" href=\"http://foo.com\" title=\"http://foo.com\">http://foo.com</a>)", result);
+ }
+
+ [Fact]
+ public void MSDN()
+ {
+ //arrange
+ var message = @"http://msdn.microsoft.com/en-us/library/system.linq.enumerable(v=vs.110).aspx";
+ HashSet<string> extractedUrls;
+
+ //act
+ var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
+
+ //assert
+ Assert.Equal("<a rel=\"nofollow external\" target=\"_blank\" href=\"http://msdn.microsoft.com/en-us/library/system.linq.enumerable(v=vs.110).aspx\" title=\"http://msdn.microsoft.com/en-us/library/system.linq.enumerable(v=vs.110).aspx\">http://msdn.microsoft.com/en-us/library/system.linq.enumerable(v=vs.110).aspx</a>", result);
+ }
+
+ [Fact]
+ public void MoreThanOneSetOfParens()
+ {
+ //arrange
+ var message = @"http://foo.com/more_(than)_one_(parens)";
+ HashSet<string> extractedUrls;
+
+ //act
+ var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
+
+ //assert
+ Assert.Equal("<a rel=\"nofollow external\" target=\"_blank\" href=\"http://foo.com/more_(than)_one_(parens)\" title=\"http://foo.com/more_(than)_one_(parens)\">http://foo.com/more_(than)_one_(parens)</a>", result);
+ }
+
+ [Fact]
+ public void WikiWithParensAndHash()
+ {
+ //arrange
+ var message = @"http://foo.com/blah_(wikipedia)#cite-1";
+ HashSet<string> extractedUrls;
+
+ //act
+ var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
+
+ //assert
+ Assert.Equal("<a rel=\"nofollow external\" target=\"_blank\" href=\"http://foo.com/blah_(wikipedia)#cite-1\" title=\"http://foo.com/blah_(wikipedia)#cite-1\">http://foo.com/blah_(wikipedia)#cite-1</a>", result);
+ }
+
+ [Fact]
+ public void WikiWithParensAndMoreAndHash()
+ {
+ //arrange
+ var message = @"http://foo.com/blah_(wikipedia)_blah#cite-1";
+ HashSet<string> extractedUrls;
+
+ //act
+ var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
+
+ //assert
+ Assert.Equal("<a rel=\"nofollow external\" target=\"_blank\" href=\"http://foo.com/blah_(wikipedia)_blah#cite-1\" title=\"http://foo.com/blah_(wikipedia)_blah#cite-1\">http://foo.com/blah_(wikipedia)_blah#cite-1</a>", result);
+ }
+
+ [Fact]
+ public void BitLyWithoutHttp()
+ {
+ //arrange
+ var message = @"bit.ly/foo";
+ HashSet<string> extractedUrls;
+
+ //act
+ var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
+
+ //assert
+ Assert.Equal("<a rel=\"nofollow external\" target=\"_blank\" href=\"http://bit.ly/foo\" title=\"bit.ly/foo\">bit.ly/foo</a>", result);
+ }
+
+ [Fact]
+ public void UnicodeInParens()
+ {
+ //arrange
+ var message = @"http://foo.com/unicode_(✪)_in_parens";
+ HashSet<string> extractedUrls;
+
+ //act
+ var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
+
+ //assert
+ Assert.Equal("<a rel=\"nofollow external\" target=\"_blank\" href=\"http://foo.com/unicode_(&#10026;)_in_parens\" title=\"http://foo.com/unicode_(✪)_in_parens\">http://foo.com/unicode_(✪)_in_parens</a>", result);
+ }
+
+ [Fact]
+ public void SomethingAfterParens()
+ {
+ //arrange
+ var message = @"http://foo.com/(something)?after=parens";
+ HashSet<string> extractedUrls;
+
+ //act
+ var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
+
+ //assert
+ Assert.Equal("<a rel=\"nofollow external\" target=\"_blank\" href=\"http://foo.com/(something)?after=parens\" title=\"http://foo.com/(something)?after=parens\">http://foo.com/(something)?after=parens</a>", result);
+ }
+
+ [Fact]
+ public void UrlInsideAQuotedSentence()
+ {
+ //arrange
+ var message = "This is a sentence with quotes and a url ... see \"http://foo.com\"";
+ HashSet<string> extractedUrls;
+
+ //act
+ var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
+
+ //assert
+ Assert.Equal("This is a sentence with quotes and a url ... see \"<a rel=\"nofollow external\" target=\"_blank\" href=\"http://foo.com\" title=\"http://foo.com\">http://foo.com</a>\"", result);
+ }
+
+ [Fact]
+ public void UrlEndsWithSlashInsideAQuotedSentence()
+ {
+ //arrange
+ var message = "\"Visit http://www.jabbr.net/\"";
+ HashSet<string> extractedUrls;
+
+ //act
+ var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
+
+ //assert
+ Assert.Equal("\"Visit <a rel=\"nofollow external\" target=\"_blank\" href=\"http://www.jabbr.net/\" title=\"http://www.jabbr.net/\">http://www.jabbr.net/</a>\"", result);
+ }
+
+ [Fact]
+ public void GoogleUrlWithQueryStringParams()
+ {
+ //arrange
+ var message = "https://www.google.com/search?q=test+search&amp;sugexp=chrome,mod=14&amp;sourceid=chrome&amp;ie=UTF-8";
+ HashSet<string> extractedUrls;
+
+ //act
+ var result = TextTransform.TransformAndExtractUrls(message, out extractedUrls);
+
+ //assert
+ Assert.Equal("<a rel=\"nofollow external\" target=\"_blank\" href=\"https://www.google.com/search?q=test+search&amp;sugexp=chrome,mod=14&amp;sourceid=chrome&amp;ie=UTF-8\" title=\"https://www.google.com/search?q=test+search&amp;sugexp=chrome,mod=14&amp;sourceid=chrome&amp;ie=UTF-8\">https://www.google.com/search?q=test+search&amp;sugexp=chrome,mod=14&amp;sourceid=chrome&amp;ie=UTF-8</a>", result);
+ //Assert.Equal("<a rel=\"nofollow external\" target=\"_blank\" href=\"https://www.google.com/search?q=test+search&sugexp=chrome,mod=14&sourceid=chrome&ie=UTF-8\" title=\"https://www.google.com/search?q=test+search&amp;sugexp=chrome,mod=14&amp;sourceid=chrome&amp;ie=UTF-8\">https://www.google.com/search?q=test+search&amp;sugexp=chrome,mod=14&amp;sourceid=chrome&amp;ie=UTF-8</a>", result);
+ }
}
}
}
View
7 JabbR/Infrastructure/TextTransform.cs
@@ -43,10 +43,11 @@ private string ConvertTextWithNewLines(string message)
return message;
}
- static Regex urlPattern = new Regex(@"(?:(?:https?|ftp)://|www\.)[^\s]+", RegexOptions.Compiled | RegexOptions.IgnoreCase);
+ static Regex urlPattern = new Regex(@"(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'"".,<>?«»“”‘’]))", RegexOptions.Compiled | RegexOptions.IgnoreCase);
public static string TransformAndExtractUrls(string message, out HashSet<string> extractedUrls)
{
+ message = HttpUtility.HtmlDecode(message);
var urls = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
message = urlPattern.Replace(message, m =>
{
@@ -58,7 +59,7 @@ public static string TransformAndExtractUrls(string message, out HashSet<string>
if (!Uri.IsWellFormedUriString(url, UriKind.Absolute))
{
- return m.Value;
+ return HttpUtility.HtmlEncode(m.Value);
}
urls.Add(url);
@@ -66,7 +67,7 @@ public static string TransformAndExtractUrls(string message, out HashSet<string>
return String.Format(CultureInfo.InvariantCulture,
"<a rel=\"nofollow external\" target=\"_blank\" href=\"{0}\" title=\"{1}\">{1}</a>",
Encoder.HtmlAttributeEncode(url),
- m.Value);
+ HttpUtility.HtmlEncode(m.Value));
});
extractedUrls = urls;
Please sign in to comment.
Something went wrong with that request. Please try again.