From cddc8b810b98b5e4730ebfb8e8addbcef231b29d Mon Sep 17 00:00:00 2001 From: John Bytheway Date: Sat, 1 Feb 2020 07:54:13 -0500 Subject: [PATCH 1/4] Add utf8_display_split function This splits a string into a sequence of displayed characters. --- src/catacharset.cpp | 20 ++++++++++++++++++++ src/catacharset.h | 7 ++++++- tests/catacharset_test.cpp | 20 ++++++++++++++++---- 3 files changed, 42 insertions(+), 5 deletions(-) diff --git a/src/catacharset.cpp b/src/catacharset.cpp index 9e680990d6c1d..7ab89e40ed394 100644 --- a/src/catacharset.cpp +++ b/src/catacharset.cpp @@ -479,6 +479,26 @@ std::u32string utf8_to_utf32( const std::string &str ) return ret; } +std::vector utf8_display_split( const std::string &s ) +{ + std::vector result; + std::string current_glyph; + const char *pos = s.c_str(); + int len = s.length(); + while( len > 0 ) { + const char *old_pos = pos; + const uint32_t ch = UTF8_getch( &pos, &len ); + const int width = mk_wcwidth( ch ); + if( width > 0 && !current_glyph.empty() ) { + result.push_back( current_glyph ); + current_glyph.clear(); + } + current_glyph += std::string( old_pos, pos ); + } + result.push_back( current_glyph ); + return result; +} + int center_text_pos( const char *text, int start_pos, int end_pos ) { int full_screen = end_pos - start_pos + 1; diff --git a/src/catacharset.h b/src/catacharset.h index f7722a9d97ea3..d2c2c0aceb6e6 100644 --- a/src/catacharset.h +++ b/src/catacharset.h @@ -5,6 +5,7 @@ #include #include #include +#include #define ANY_LENGTH 5 #define NULL_UNICODE 0x0000 @@ -56,11 +57,15 @@ std::string utf8_to_native( const std::string &str ); std::string utf32_to_utf8( const std::u32string &str ); std::u32string utf8_to_utf32( const std::string &str ); +// Split the given string into displayed characters. Each element of the returned vector +// contains one 'regular' codepoint and all subsequent combining characters. +std::vector utf8_display_split( const std::string & ); + /** * UTF8-Wrapper over std::string. * It looks and feels like a std::string, but uses code points counts * as index, not bytes. - * A multi-byte Unicode character might by represented + * A multi-byte Unicode character might be represented * as 3 bytes in UTF8, this class will see these 3 bytes as 1 character. * It will never separate them. It will however split between code points * which might be problematic when containing combination characters. diff --git a/tests/catacharset_test.cpp b/tests/catacharset_test.cpp index cb04e52404e59..b353d30d37688 100644 --- a/tests/catacharset_test.cpp +++ b/tests/catacharset_test.cpp @@ -4,22 +4,34 @@ #include "catch/catch.hpp" #include "catacharset.h" -TEST_CASE( "utf8_width" ) +TEST_CASE( "utf8_width", "[catacharset]" ) { CHECK( utf8_width( "Hello, world!", false ) == 13 ); CHECK( utf8_width( "你好,世界!", false ) == 12 ); CHECK( utf8_width( "Hello, 世界!", false ) == 12 ); CHECK( utf8_width( "激活", true ) == 4 ); CHECK( utf8_width( "激活", false ) == 25 ); + CHECK( utf8_width( "à", false ) == 1 ); + CHECK( utf8_width( "y\u0300", false ) == 1 ); + CHECK( utf8_width( "à̸̠你⃫", false ) == 3 ); } -TEST_CASE( "base64" ) +TEST_CASE( "utf8_display_split", "[catacharset]" ) +{ + CHECK( utf8_display_split( "你好" ) == std::vector { "你", "好" } ); + CHECK( utf8_display_split( "à" ) == std::vector { "à" } ); + CHECK( utf8_display_split( "y\u0300" ) == std::vector { "y\u0300" } ); + CHECK( utf8_display_split( "à̸̠你⃫" ) == std::vector { "à̸̠", "你⃫" } ); + CHECK( utf8_display_split( " " ) == std::vector { " ", " ", " ", " " } ); +} + +TEST_CASE( "base64", "[catacharset]" ) { CHECK( base64_encode( "hello" ) == "#aGVsbG8=" ); CHECK( base64_decode( "#aGVsbG8=" ) == "hello" ); } -TEST_CASE( "utf8_to_wstr" ) +TEST_CASE( "utf8_to_wstr", "[catacharset]" ) { // std::mbstowcs' returning -1 workaround setlocale( LC_ALL, "" ); @@ -28,7 +40,7 @@ TEST_CASE( "utf8_to_wstr" ) CHECK( utf8_to_wstr( src ) == dest ); } -TEST_CASE( "wstr_to_utf8" ) +TEST_CASE( "wstr_to_utf8", "[catacharset]" ) { // std::wcstombs' returning -1 workaround setlocale( LC_ALL, "" ); From 8e46186e136ffb3ae0f44bb36cfa48d2f694e76f Mon Sep 17 00:00:00 2001 From: John Bytheway Date: Sun, 2 Feb 2020 10:48:17 -0500 Subject: [PATCH 2/4] Switch mapgen keys from ints to strings Using new facility to split strings into displayed chunks, allow mapgen to use arbitrary Unicode characters (including combining characters) as keys. --- src/mapgen.cpp | 87 ++++++++++++++++++++++++++++---------------------- src/mapgen.h | 35 +++++++++++++++++--- 2 files changed, 80 insertions(+), 42 deletions(-) diff --git a/src/mapgen.cpp b/src/mapgen.cpp index 4e7e55f9f358e..0be95a140118c 100644 --- a/src/mapgen.cpp +++ b/src/mapgen.cpp @@ -686,6 +686,20 @@ void jmapgen_place::offset( const point &offset ) y.valmax -= offset.y; } +map_key::map_key( const std::string &s ) : str( s ) +{ + if( utf8_width( str ) != 1 ) { + debugmsg( "map key '%s' must be 1 column", str ); + } +} + +map_key::map_key( const JsonMember &member ) : str( member.name() ) +{ + if( utf8_width( str ) != 1 ) { + member.throw_error( "format map key must be 1 column" ); + } +} + /** * This is a generic mapgen piece, the template parameter PieceType should be another specific * type of jmapgen_piece. This class contains a vector of those objects and will chose one of @@ -1960,16 +1974,13 @@ void mapgen_palette::load_place_mapings( const JsonObject &jo, const std::string { if( jo.has_object( "mapping" ) ) { for( const JsonMember member : jo.get_object( "mapping" ) ) { - const std::string &key = member.name(); - if( key.size() != 1 ) { - member.throw_error( "format map key must be 1 character" ); - } + const map_key key( member ); JsonObject sub = member.get_object(); sub.allow_omitted_members(); if( !sub.has_member( member_name ) ) { continue; } - auto &vect = format_placings[ key[0] ]; + auto &vect = format_placings[ key ]; ::load_place_mapings( sub.get_member( member_name ), vect ); } } @@ -1983,11 +1994,8 @@ void mapgen_palette::load_place_mapings( const JsonObject &jo, const std::string return; } for( const JsonMember member : jo.get_object( member_name ) ) { - const std::string &key = member.name(); - if( key.size() != 1 ) { - member.throw_error( "format map key must be 1 character" ); - } - auto &vect = format_placings[ key[0] ]; + const map_key key( member ); + auto &vect = format_placings[ key ]; ::load_place_mapings( member, vect ); } } @@ -2065,20 +2073,17 @@ mapgen_palette mapgen_palette::load_internal( const JsonObject &jo, const std::s // "terrain": { "a": "t_grass", "b": "t_lava" } if( jo.has_member( "terrain" ) ) { for( const JsonMember member : jo.get_object( "terrain" ) ) { - const std::string &key = member.name(); - if( key.size() != 1 ) { - member.throw_error( "format map key must be 1 character" ); - } + const map_key key( member ); if( member.test_string() ) { - format_terrain[key[0]] = ter_id( member.get_string() ); + format_terrain[key] = ter_id( member.get_string() ); } else { - auto &vect = format_placings[ key[0] ]; + auto &vect = format_placings[ key ]; ::load_place_mapings( member, vect ); if( !vect.empty() ) { // Dummy entry to signal that this terrain is actually defined, because // the code below checks that each square on the map has a valid terrain // defined somehow. - format_terrain[key[0]] = t_null; + format_terrain[key] = t_null; } } } @@ -2086,14 +2091,11 @@ mapgen_palette mapgen_palette::load_internal( const JsonObject &jo, const std::s if( jo.has_object( "furniture" ) ) { for( const JsonMember member : jo.get_object( "furniture" ) ) { - const std::string &key = member.name(); - if( key.size() != 1 ) { - member.throw_error( "format map key must be 1 character" ); - } + const map_key key( member ); if( member.test_string() ) { - format_furniture[key[0]] = furn_id( member.get_string() ); + format_furniture[key] = furn_id( member.get_string() ); } else { - auto &vect = format_placings[ key[0] ]; + auto &vect = format_placings[ key ]; ::load_place_mapings( member, vect ); } } @@ -2231,27 +2233,35 @@ bool mapgen_function_json_base::setup_common( const JsonObject &jo ) return false; } - // mandatory: mapgensize rows of mapgensize character lines, each of which must have a matching key in "terrain", - // unless fill_ter is set + // mandatory: mapgensize rows of mapgensize character lines, each of which must have a + // matching key in "terrain", unless fill_ter is set // "rows:" [ "aaaajustlikeinmapgen.cpp", "this.must!be!exactly.24!", "and_must_match_terrain_", .... ] point expected_dim = mapgensize + m_offset; + assert( expected_dim.x >= 0 ); + assert( expected_dim.y >= 0 ); + parray = jo.get_array( "rows" ); if( static_cast( parray.size() ) < expected_dim.y ) { parray.throw_error( string_format( "format: rows: must have at least %d rows, not %d", expected_dim.y, parray.size() ) ); } for( int c = m_offset.y; c < expected_dim.y; c++ ) { - const auto tmpval = parray.get_string( c ); - if( static_cast( tmpval.size() ) < expected_dim.x ) { - parray.throw_error( string_format( "format: row %d must have at least %d columns, not %d", - c + 1, expected_dim.x, tmpval.size() ) ); + const std::string row = parray.get_string( c ); + std::vector row_keys; + for( const std::string &key : utf8_display_split( row ) ) { + row_keys.emplace_back( key ); + } + if( row_keys.size() < static_cast( expected_dim.x ) ) { + parray.throw_error( + string_format( " format: row %d must have at least %d columns, not %d", + c + 1, expected_dim.x, row_keys.size() ) ); } for( int i = m_offset.x; i < expected_dim.x; i++ ) { const point p = point( i, c ) - m_offset; - const int tmpkey = tmpval[i]; - const auto iter_ter = format_terrain.find( tmpkey ); - const auto iter_furn = format_furniture.find( tmpkey ); - const auto fpi = format_placings.find( tmpkey ); + const map_key key = row_keys[i]; + const auto iter_ter = format_terrain.find( key ); + const auto iter_furn = format_furniture.find( key ); + const auto fpi = format_placings.find( key ); const bool has_terrain = iter_ter != format_terrain.end(); const bool has_furn = iter_furn != format_furniture.end(); @@ -2260,18 +2270,19 @@ bool mapgen_function_json_base::setup_common( const JsonObject &jo ) if( !has_terrain && !fallback_terrain_exists ) { parray.throw_error( string_format( "format: rows: row %d column %d: " - "'%c' is not in 'terrain', and no 'fill_ter' is set!", - c + 1, i + 1, static_cast( tmpkey ) ) ); + "'%s' is not in 'terrain', and no 'fill_ter' is set!", + c + 1, i + 1, key.str ) ); } - if( test_mode && !has_terrain && !has_furn && !has_placing && tmpkey != ' ' && tmpkey != '.' ) { + if( test_mode && !has_terrain && !has_furn && !has_placing && + key.str != " " && key.str != "." ) { // TODO: Once all the in-tree mods don't report this error, // it should be changed to happen in regular games (not // just test_mode) and be non-fatal, so that mappers find // out about their issues before they PR their changes. parray.throw_error( string_format( "format: rows: row %d column %d: " - "'%c' has no terrain, furniture, or other definition", - c + 1, i + 1, static_cast( tmpkey ) ) ); + "'%s' has no terrain, furniture, or other definition", + c + 1, i + 1, key.str ) ); } if( has_terrain ) { format[ calc_index( p ) ].ter = iter_ter->second; diff --git a/src/mapgen.h b/src/mapgen.h index 6c9e31034673d..73503b33e7b60 100644 --- a/src/mapgen.h +++ b/src/mapgen.h @@ -181,19 +181,46 @@ class jmapgen_place using palette_id = std::string; +// Strong typedef for strings used as map/palette keys +// Each key should be a UTF-8 string displayed in only one column (i.e. +// utf8_width of 1) but can contain multiple Unicode code points. +class map_key +{ + public: + map_key( const std::string & ); + map_key( const JsonMember & ); + + friend bool operator==( const map_key &l, const map_key &r ) { + return l.str == r.str; + } + + std::string str; +}; + +namespace std +{ +template<> +struct hash { + size_t operator()( const map_key &k ) const noexcept { + return hash {}( k.str ); + } +}; +} // namespace std + class mapgen_palette { public: palette_id id; /** - * The mapping from character code (key) to a list of things that should be placed. This is + * The mapping from character (key) to a list of things that should be placed. This is * similar to objects, but it uses key to get the actual position where to place things * out of the json "bitmap" (which is used to paint the terrain/furniture). */ - using placing_map = std::map< int, std::vector< shared_ptr_fast > >; + using placing_map = + std::unordered_map>>; - std::map format_terrain; - std::map format_furniture; + std::unordered_map format_terrain; + std::unordered_map format_furniture; placing_map format_placings; template From 24b5616070a6a106e2acbb0fc069dac5287fea84 Mon Sep 17 00:00:00 2001 From: John Bytheway Date: Sat, 8 Feb 2020 21:51:29 -0500 Subject: [PATCH 3/4] Test Unicode mapgen with bookcases in house_w MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace bookcase symbol in the house_w_nest_palette with a Unicode symbol that looks similar to a bookcase (▤). --- data/json/mapgen/nested/house_nested.json | 50 +++++++++---------- .../json/mapgen_palettes/house_w_palette.json | 2 +- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/data/json/mapgen/nested/house_nested.json b/data/json/mapgen/nested/house_nested.json index ffd95d71e5cbb..032ef11d11771 100644 --- a/data/json/mapgen/nested/house_nested.json +++ b/data/json/mapgen/nested/house_nested.json @@ -467,7 +467,7 @@ "mapgensize": [ 4, 4 ], "rotation": [ 0, 3 ], "rows": [ - " CR ", + " C▤ ", "O ", " EE ", " EE " @@ -476,7 +476,7 @@ "items": { "O": [ { "item": "SUS_dresser_mens", "chance": 50 }, { "item": "SUS_dresser_womens", "chance": 50, "repeat": [ 1, 2 ] } ], "E": { "item": "bed", "chance": 40, "repeat": [ 1, 2 ] }, - "R": { "item": "homebooks", "chance": 10, "repeat": [ 1, 2 ] } + "▤": { "item": "homebooks", "chance": 10, "repeat": [ 1, 2 ] } } } }, @@ -492,13 +492,13 @@ " EE ", " EE ", "O ", - " CR " + " C▤ " ], "palettes": [ "house_w_nest_palette" ], "items": { "O": [ { "item": "SUS_dresser_mens", "chance": 50 }, { "item": "SUS_dresser_womens", "chance": 50, "repeat": [ 1, 2 ] } ], "E": { "item": "bed", "chance": 40, "repeat": [ 1, 2 ] }, - "R": { "item": "homebooks", "chance": 10, "repeat": [ 1, 2 ] } + "▤": { "item": "homebooks", "chance": 10, "repeat": [ 1, 2 ] } } } }, @@ -513,14 +513,14 @@ "rows": [ " ", "C EE", - "R EE", + "▤ EE", " O " ], "palettes": [ "house_w_nest_palette" ], "items": { "O": [ { "item": "SUS_dresser_mens", "chance": 50 }, { "item": "SUS_dresser_womens", "chance": 50, "repeat": [ 1, 2 ] } ], "E": { "item": "bed", "chance": 40, "repeat": [ 1, 2 ] }, - "R": { "item": "homebooks", "chance": 10, "repeat": [ 1, 2 ] } + "▤": { "item": "homebooks", "chance": 10, "repeat": [ 1, 2 ] } } } }, @@ -535,14 +535,14 @@ "rows": [ " ", "EE C", - "EE R", + "EE ▤", " O " ], "palettes": [ "house_w_nest_palette" ], "items": { "O": [ { "item": "SUS_dresser_mens", "chance": 50 }, { "item": "SUS_dresser_womens", "chance": 50, "repeat": [ 1, 2 ] } ], "E": { "item": "bed", "chance": 40, "repeat": [ 1, 2 ] }, - "R": { "item": "homebooks", "chance": 10, "repeat": [ 1, 2 ] } + "▤": { "item": "homebooks", "chance": 10, "repeat": [ 1, 2 ] } } } }, @@ -704,7 +704,7 @@ " ", "EE I", "L AI", - "y R", + "y ▤", "OCy " ], "palettes": [ "house_w_nest_palette" ], @@ -713,7 +713,7 @@ "E": { "item": "bed", "chance": 40, "repeat": [ 1, 2 ] }, "I": { "item": "SUS_desks_bedroom_unisex", "chance": 40, "repeat": [ 1, 2 ] }, "L": { "item": "homebooks", "chance": 10, "repeat": [ 1, 2 ] }, - "R": { "item": "homebooks", "chance": 30, "repeat": [ 1, 2 ] } + "▤": { "item": "homebooks", "chance": 30, "repeat": [ 1, 2 ] } } } }, @@ -729,8 +729,8 @@ " OO ", "IB L", "I EE", - "y R", - "RCa " + "y ▤", + "▤Ca " ], "palettes": [ "house_w_nest_palette" ], "items": { @@ -739,7 +739,7 @@ "I": { "item": "SUS_desks_bedroom_unisex", "chance": 40, "repeat": [ 1, 2 ] }, "L": { "item": "homebooks", "chance": 10, "repeat": [ 1, 2 ] }, "a": { "item": "unisex_coat_rack", "chance": 100, "repeat": [ 1, 2 ] }, - "R": { "item": "homebooks", "chance": 30, "repeat": [ 1, 2 ] } + "▤": { "item": "homebooks", "chance": 30, "repeat": [ 1, 2 ] } } } }, @@ -1023,8 +1023,8 @@ "rotation": [ 0, 3 ], "rows": [ " @@p ", - " R", - " pp R", + " ▤", + " pp ▤", " ", " xxx " ], @@ -1118,11 +1118,11 @@ "mapgensize": [ 2, 2 ], "rotation": [ 0, 3 ], "rows": [ - "RR", + "▤▤", "C " ], "palettes": [ "house_w_nest_palette" ], - "items": { "R": [ { "item": "homebooks", "chance": 30 } ] } + "items": { "▤": [ { "item": "homebooks", "chance": 30 } ] } } }, { @@ -1135,10 +1135,10 @@ "rotation": [ 0, 3 ], "rows": [ "C ", - "R " + "▤ " ], "palettes": [ "house_w_nest_palette" ], - "items": { "R": [ { "item": "homebooks", "chance": 30 } ] } + "items": { "▤": [ { "item": "homebooks", "chance": 30 } ] } } }, { @@ -1151,10 +1151,10 @@ "rotation": [ 0, 3 ], "rows": [ " C", - "HR" + "H▤" ], "palettes": [ "house_w_nest_palette" ], - "items": { "R": [ { "item": "homebooks", "chance": 30 } ] } + "items": { "▤": [ { "item": "homebooks", "chance": 30 } ] } } }, { @@ -1230,12 +1230,12 @@ "mapgensize": [ 3, 3 ], "rotation": [ 0, 3 ], "rows": [ - "R H", - "RC ", - "R " + "▤ H", + "▤C ", + "▤ " ], "palettes": [ "house_w_nest_palette" ], - "items": { "R": [ { "item": "homebooks", "chance": 30 } ] } + "items": { "▤": [ { "item": "homebooks", "chance": 30 } ] } } }, { diff --git a/data/json/mapgen_palettes/house_w_palette.json b/data/json/mapgen_palettes/house_w_palette.json index 7d83fa5b9f07c..1cf6d42e0ae3b 100644 --- a/data/json/mapgen_palettes/house_w_palette.json +++ b/data/json/mapgen_palettes/house_w_palette.json @@ -22,7 +22,7 @@ "O": "f_dresser", "P": "f_locker", "Q": "f_rack", - "R": "f_bookcase", + "▤": "f_bookcase", "S": [ [ "f_filing_cabinet", 80 ], [ "f_shredder", 20 ] ], "U": "f_utility_shelf", "V": "f_glass_cabinet", From 47d20e3c756c60caf6dcee72a684303cabd3e28f Mon Sep 17 00:00:00 2001 From: John Bytheway Date: Wed, 19 Feb 2020 04:45:10 -0500 Subject: [PATCH 4/4] Document the Unicode mapgen support --- doc/MAPGEN.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/MAPGEN.md b/doc/MAPGEN.md index 2f5b1fbee289c..c6b62497b6cc7 100644 --- a/doc/MAPGEN.md +++ b/doc/MAPGEN.md @@ -197,9 +197,11 @@ Example: "fill_ter": "t_grass" *required if "fill_ter" is unset* > Value: ([array]): blocks of 24 rows of blocks of 24 character lines. Each character is defined by "terrain" and optionally "furniture" or other entries below +Other parts can be linked with this map, for example one can place things like a gaspump (with gasoline) or a toilet (with water) or items from an item group or fields at the square given by a character. + Any character used here must have some definition elsewhere to indicate its purpose. Failing to do so is an error which will be caught by running the tests. The tests will run automatically when you make a pull request for adding new maps to the game. If you have defined `fill_ter` or you are writing nested mapgen, then there are a couple of exceptions. The space and period characters (` ` and `.`) are permitted to have no definition and be used for 'background' in the `rows`. -Other parts can be linked with this map, for example one can place things like a gaspump (with gasoline) or a toilet (with water) or items from an item group or fields at the square given by a character. +As keys, you can use any Unicode characters which are not double-width. This includes for example most European alphabets but not Chinese characters. If you intend to take advantage of this, ensure that your editor is saving the file with a UTF-8 encoding. Accents are acceptable, even when using [combining characters](https://en.wikipedia.org/wiki/Combining_character). No normalization is performed; comparison is done at the raw bytes (code unit) level. Therefore, there are literally an infinite number of mapgen key characters available. Please don't abuse this by using distinct characters that are visually indistinguishable, or which are so rare as to be unlikely to render correctly for other developers. Example: