From 52e5f61afcce536c4e7e6b9a31a54908d23de5ec Mon Sep 17 00:00:00 2001 From: Sam Guymer Date: Sun, 10 Dec 2017 13:00:36 +1000 Subject: [PATCH] RFC3986 URI path segment encoding Use an RFC3986 compliant encoder instead of URLEncoder.encode() when encoding URI path segments. --- .../scala/spinoco/protocol/http/Uri.scala | 12 +++-- .../spinoco/protocol/http/codec/RFC3986.scala | 51 +++++++++++++++++++ .../scala/spinoco/protocol/http/UriSpec.scala | 4 ++ 3 files changed, 64 insertions(+), 3 deletions(-) create mode 100644 http/src/main/scala/spinoco/protocol/http/codec/RFC3986.scala diff --git a/http/src/main/scala/spinoco/protocol/http/Uri.scala b/http/src/main/scala/spinoco/protocol/http/Uri.scala index 888ba3b..c4a2f2c 100644 --- a/http/src/main/scala/spinoco/protocol/http/Uri.scala +++ b/http/src/main/scala/spinoco/protocol/http/Uri.scala @@ -8,7 +8,7 @@ import scodec.bits.BitVector import spinoco.protocol.common.util._ import spinoco.protocol.common.codec._ import spinoco.protocol.common.Terminator -import spinoco.protocol.http.Uri.QueryParameter.Multi +import spinoco.protocol.http.codec.RFC3986 import scala.annotation.tailrec @@ -116,7 +116,7 @@ object Uri { def stringify:String = { val sb = new StringBuilder() if (self.initialSlash) sb.append("/") - sb.append(self.segments.map(s => URLEncoder.encode(s, "UTF-8")).mkString("/")) + sb.append(self.segments.map(RFC3986.encodePathSegment).mkString("/")) if (self.trailingSlash) sb.append("/") sb.toString() } @@ -125,6 +125,8 @@ object Uri { object Path { + private val PlusRegex = "\\+".r + /** constructs relative path without initial slash (`/`) **/ def relative(s: String) : Path = Path(initialSlash = false, trailingSlash = false, segments = Seq(s)) @@ -137,7 +139,11 @@ object Uri { def fromUtf8String(path: String):Uri.Path = { val trimmed = path.trim - val segments = trimmed.split("/").filter(_.nonEmpty).map(s => URLDecoder.decode(s, "UTF-8")) + val segments = trimmed.split("/").filter(_.nonEmpty).map { s => + // avoid URLDecoder turning a + into a space + val segment = PlusRegex.replaceAllIn(s, "%2B") + URLDecoder.decode(segment, "UTF-8") + } Path( initialSlash = trimmed.startsWith("/") , segments = segments diff --git a/http/src/main/scala/spinoco/protocol/http/codec/RFC3986.scala b/http/src/main/scala/spinoco/protocol/http/codec/RFC3986.scala new file mode 100644 index 0000000..805fdc6 --- /dev/null +++ b/http/src/main/scala/spinoco/protocol/http/codec/RFC3986.scala @@ -0,0 +1,51 @@ +package spinoco.protocol.http.codec + +import java.nio.charset.StandardCharsets + +import scala.collection.immutable.BitSet + +/** + * https://tools.ietf.org/html/rfc3986 + */ +object RFC3986 { + + val genDelims = BitSet(':', '/', '?', '#', '[', ']', '@') + + val subDelims = BitSet('!', '$', '&', ''', '(', ')' , '*', '+', ',', ';', '=') + + val reserved = genDelims ++ subDelims + + val alpha = BitSet((('a' to 'z') ++ ('A' to 'Z')).map(_.toInt): _*) + + val digit = BitSet(('0' to '9').map(_.toInt): _*) + + val unreserved = alpha ++ digit ++ BitSet('-', '.', '_', '~') + + val pchar = unreserved ++ subDelims ++ BitSet(':', '@') + + + // https://tools.ietf.org/html/rfc3986#section-3.1 + val scheme = alpha ++ digit ++ BitSet('+', '-', '.') + + // https://tools.ietf.org/html/rfc3986#section-3.3 + val pathSegment = pchar + + + def encode(str: String, allowedChars: BitSet): String = { + // add a buffer to hopefully account for all chars that need to be escaped + val sb = new StringBuilder(str.length * 12 / 10) + str.foreach { c => + if (allowedChars.contains(c)) sb.append(c) + else { + // https://tools.ietf.org/html/rfc3986#section-2.5 + c.toString.getBytes(StandardCharsets.UTF_8).foreach { b => + sb.append("%" + "%02X".format(b)) + } + } + } + sb.mkString + } + + def encodePathSegment(segment: String): String = encode(segment, pathSegment) + +} diff --git a/http/src/test/scala/spinoco/protocol/http/UriSpec.scala b/http/src/test/scala/spinoco/protocol/http/UriSpec.scala index 71075e5..0d10086 100644 --- a/http/src/test/scala/spinoco/protocol/http/UriSpec.scala +++ b/http/src/test/scala/spinoco/protocol/http/UriSpec.scala @@ -73,6 +73,10 @@ object UriSpec extends Properties("Uri") { , Uri(HttpScheme.HTTP, HostPort("www.spinoco.com", None), Uri.Path.Root, Uri.Query.empty) , "http://www.spinoco.com/" ) + , ("http://www.spinoco.com/aA0-._~/!$&'()*+,;=/:@/%5B%5D%2F%7B%7D%C3%A9" + , Uri(HttpScheme.HTTP, HostPort("www.spinoco.com", None), Uri.Path.Root / "aA0-._~" / "!$&'()*+,;=" / ":@" / "[]/{}é", Uri.Query.empty) + , "http://www.spinoco.com/aA0-._~/!$&'()*+,;=/:@/%5B%5D%2F%7B%7D%C3%A9" + ) , ("http://x.com/123?a=1&b=2;c=3" , Uri(HttpScheme.HTTP, HostPort("x.com", None), Uri.Path.Root / "123", Uri.Query("a", "1") :+ (QueryParameter.single("b", "2") :+ ("c", "3"))) , "http://x.com/123?a=1&b=2;c=3"