Skip to content

Commit

Permalink
Better handling of unicode supplementary chars
Browse files Browse the repository at this point in the history
Supplementary characters require 2 java/kotlin
chars to store but are rendered as a single
character. This caused problems for a lot of
places that assumed length = number of chars.

Replaced the buffer in TextSurface to be an array
of codepoints (int) rather than a plain String (which
would have an undefined length if surrogate pairs are
used). Iterate through the codepoints rather than chars.

Note this is more consistent but not necessarily
always better than to measuring these as 2 chars.
Text measurement is complicated.
  • Loading branch information
SonOfBowser authored and JakeWharton committed Feb 7, 2020
1 parent 0b2efcd commit 2ef687d
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 22 deletions.
16 changes: 11 additions & 5 deletions picnic/src/main/java/com/jakewharton/picnic/textLayout.kt
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,20 @@ interface TextLayout {
fun draw(canvas: TextCanvas)
}

/**
* Returns number of Unicode code points in this string. This may be different from [String.length]
* if the string contains surrogate pairs.
*/
private val String.unicodeLength: Int get() = codePointCount(0, length)

internal class SimpleLayout(private val cell: PositionedCell) : TextLayout {
private val leftPadding = cell.canonicalStyle?.paddingLeft ?: 0
private val topPadding = cell.canonicalStyle?.paddingTop ?: 0

override fun measureWidth(): Int {
return leftPadding +
(cell.canonicalStyle?.paddingRight ?: 0) +
(cell.cell.content.split('\n').maxBy { it.length }?.length ?: 0)
(cell.cell.content.split('\n').maxBy { it.unicodeLength }?.unicodeLength ?: 0)
}

override fun measureHeight(): Int {
Expand Down Expand Up @@ -53,10 +59,10 @@ internal class SimpleLayout(private val cell: PositionedCell) : TextLayout {

var x = left
var y = top
for (char in cell.cell.content) {
// TODO invisible chars, codepoints, graphemes, etc.
if (char != '\n') {
canvas[y, x++] = char
for (codePoint in cell.cell.content.codePoints()) {
// TODO invisible chars, graphemes, etc.
if (codePoint != '\n'.toInt()) {
canvas[y, x++] = codePoint
} else {
y++
x = left
Expand Down
39 changes: 22 additions & 17 deletions picnic/src/main/java/com/jakewharton/picnic/textSurface.kt
Original file line number Diff line number Diff line change
Expand Up @@ -4,36 +4,41 @@ internal class TextSurface(
override val width: Int,
override val height: Int
) : TextCanvas {
private val buffer = StringBuilder(height * (width + 1)).apply {
repeat(height) {
repeat(width) {
append(' ')
}
append('\n')
}
}
private val buffer = IntArray(height * width) { ' '.toInt() }

override operator fun set(row: Int, column: Int, char: Char) {
override operator fun set(row: Int, column: Int, codePoint: Int) {
require(row in 0 until height) { "Row $row not in range [0, $height)" }
require(column in 0 until width) { "Column $column not in range [0, $width)" }
buffer[row * (width + 1) + column] = char
buffer[row * width + column] = codePoint
}

override fun get(row: Int, column: Int): Char {
override fun get(row: Int, column: Int): Int {
require(row in 0 until height) { "Row $row not in range [0, $height)" }
require(column in 0 until width) { "Column $column not in range [0, $width)" }
return buffer[row * (width + 1) + column]
return buffer[row * width + column]
}

override fun toString() = buffer.toString()
override fun toString(): String {
val capacity = buffer.size +
/* newlines: */ height +
/* multi-char codepoint estimate: */ width
return buildString(capacity) {
buffer.forEachIndexed { index, codePoint ->
appendCodePoint(codePoint)

if (index % width == width - 1) appendln()
}
}
}
}

interface TextCanvas {
val width: Int
val height: Int

operator fun set(row: Int, column: Int, char: Char)
operator fun get(row: Int, column: Int): Char
operator fun set(row: Int, column: Int, char: Char) = set(row, column, char.toInt())
operator fun set(row: Int, column: Int, char: Int)
operator fun get(row: Int, column: Int): Int

@JvmDefault
fun clip(left: Int, right: Int, top: Int, bottom: Int): TextCanvas {
Expand All @@ -51,13 +56,13 @@ private class ClippedTextCanvas(
override val width = right - left
override val height = bottom - top

override fun set(row: Int, column: Int, char: Char) {
override fun set(row: Int, column: Int, char: Int) {
require(row in 0 until height) { "Row $row not in range [0, $height)" }
require(column in 0 until width) { "Column $column not in range [0, $width)" }
canvas[top + row, left + column] = char
}

override fun get(row: Int, column: Int): Char {
override fun get(row: Int, column: Int): Int {
require(row in 0 until height) { "Row $row not in range [0, $height)" }
require(column in 0 until width) { "Column $column not in range [0, $width)" }
return canvas[top + row, left + column]
Expand Down
40 changes: 40 additions & 0 deletions picnic/src/test/java/com/jakewharton/picnic/CellSizeTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -120,4 +120,44 @@ class CellSizeTest {
| 1
|""".trimMargin())
}

@Test fun unicode() {
val table = table {
// 1 UTF-8 bytes.
row('\u0031', 'a')
// 2 UTF-8 bytes.
row('\u00A3', 'a')
// 3 UTF-8 bytes.
row('\u20AC', 'a')
// 3 UTF-8 bytes, full-width.
row('\u5317', 'a')
// 4 UTF-8 bytes (2 * UTF-16), full-width.
row(String(Character.toChars(0x1F603)), 'a')
}

assertThat(table.renderText()).isEqualTo("""
|1a
|£a
|€a
|北a
|😃a
|""".trimMargin())
}

@Test fun mixedWidth() {
// Rows contain mixture of BMP and supplementary codepoints.
val table = table {
row("a", "a")
// 2 UTF-8 bytes.
row("😃.😃.😃", 'a')
// 2 UTF-8 bytes.
row(".😃.😃.", 'a')
}

assertThat(table.renderText()).isEqualTo("""
|a a
|😃.😃.😃a
|.😃.😃.a
|""".trimMargin())
}
}

0 comments on commit 2ef687d

Please sign in to comment.