diff --git a/stdlib/Unicode/src/Unicode.jl b/stdlib/Unicode/src/Unicode.jl index 7ac3a9f9b1d4b..b1f93ac116be1 100644 --- a/stdlib/Unicode/src/Unicode.jl +++ b/stdlib/Unicode/src/Unicode.jl @@ -2,7 +2,7 @@ module Unicode -export graphemes +export graphemes, isemoji """ Unicode.normalize(s::AbstractString; keywords...) @@ -62,7 +62,6 @@ julia> Unicode.normalize("JรบLiA", stripmark=true) function normalize end normalize(s::AbstractString, nf::Symbol) = Base.Unicode.normalize(s, nf) normalize(s::AbstractString; kwargs...) = Base.Unicode.normalize(s; kwargs...) - """ Unicode.isassigned(c) -> Bool @@ -89,4 +88,94 @@ letter combined with an accent mark is a single grapheme.) """ graphemes(s::AbstractString) = Base.Unicode.GraphemeIterator{typeof(s)}(s) + + +include("emoji_ranges.jl") +const SKIN_COLORS = 0x1F3FB:0x1F3FF +const REGIONAL_INDICATORS = 0x1F1E6:0x1F1FF +const ZWJ = '\u200d' # Zero-width joiner + +""" + isemoji(Union{AbstractChar, AbstractString}) -> Bool +Test whether a character or string is a single emoji +Combined emoji sequences separated by the zero-width joiner character `'\u200d'` +such as ๐Ÿ‘จโ€โค๏ธโ€๐Ÿ‘จ `['๐Ÿ‘จ', '\u200d', 'โค', '\uFE0F', '\u200d', '๐Ÿ‘จ']` are supported, +though this function cannot determine whether a given sequence of emoji and +zero-width joiners would result in a valid composite emoji. +### Examples +```jldoctest +julia> Unicode.isemoji("๐Ÿ‘จโ€โค๏ธโ€๐Ÿ‘จ") +true + +julia> Unicode.isemoji('๐Ÿ‘จ') +true + +julia> Unicode.isemoji('A') +false +``` +""" +function isemoji(c::AbstractChar) + u = UInt32(c) + isemoji_code(u) +end + +isemoji(s::AbstractString) = _isemoji(s) + +function isemoji_code(u::UInt32) + @inbounds for emojiset in EMOJI_RANGES + u in emojiset && return true + end + return false +end + +# a single emoji comprises one of several patterns +# 1. a single character which is a basic emoji +# 2. a pair of two regional indicator flags +# 3. a single emoji character with a modifier character (FE0F or skin color after it) +# 4. a pound sign, asterisk, or digit followed by FE0F 20E3 or just 20E3 +# 5. a single regional indicator and then 6 small letter characters +# (subnational flags, currently only england, scotland and wales) +# 6. Several emojis separated by zero-width joiners +function _isemoji(s::AbstractString; ZWJ_allowed = true) + isempty(s) && return false + codepoints = codepoint.(c for c in s) + L = length(codepoints) + if L == 1 # single character emoji (pattern 1) + isemoji_code(codepoints[1]) && return true + elseif L == 2 + # Check for country flag pattern (pattern 2) + if (codepoints[2] in REGIONAL_INDICATORS && + codepoints[1] in REGIONAL_INDICATORS) + return true + elseif isemoji_code(codepoints[1]) + # Check for skin color or FE0F modifier pattern (pattern 2) + if codepoints[2] in SKIN_COLORS || codepoints[2] == 0x0FE0F + return true + end + elseif isdigit(s[1]) && codepoints[end] == 0x020E3 + # Check for keycap pattern (pattern 4) + return true + else + return false + end + else + # Check for keycap pattern (pattern 4) + if L == 3 && (isdigit(s[1]) || s[1] == '#' || s[1] == '*') + if codepoints[2] == 0x0FE0F && codepoints[end] == 0x020E3 + return true + else + return false + end + # Check for England, Scotland and Wales (pattern 5) + elseif L == 7 && codepoints[1] == 0x1F3F4 && all(c in 0xE0062:0xE007F for c in codepoints[2:end]) + return true + elseif ZWJ_allowed + # check for zero-width join patterns (pattern 6) + s[1] == ZWJ || s[end] == ZWJ && return false + splitstr = split(s, ZWJ) + all(x -> (_isemoji(x, ZWJ_allowed = false)), splitstr) && return true + end + end + return false +end end diff --git a/stdlib/Unicode/src/emoji_ranges.jl b/stdlib/Unicode/src/emoji_ranges.jl new file mode 100644 index 0000000000000..d62d73da1e887 --- /dev/null +++ b/stdlib/Unicode/src/emoji_ranges.jl @@ -0,0 +1,696 @@ +const EMOJI_RANGES = [ + 0x000000a9:0x000000a9, + 0x000000ae:0x000000ae, + 0x0000203c:0x0000203c, + 0x00002049:0x00002049, + 0x00002122:0x00002122, + 0x00002139:0x00002139, + 0x00002194:0x00002199, + 0x000021a9:0x000021aa, + 0x0000231a:0x0000231b, + 0x00002328:0x00002328, + 0x000023cf:0x000023cf, + 0x000023e9:0x000023ec, + 0x000023ed:0x000023ee, + 0x000023ef:0x000023ef, + 0x000023f0:0x000023f0, + 0x000023f1:0x000023f2, + 0x000023f3:0x000023f3, + 0x000023f8:0x000023fa, + 0x000024c2:0x000024c2, + 0x000025aa:0x000025ab, + 0x000025b6:0x000025b6, + 0x000025c0:0x000025c0, + 0x000025fb:0x000025fe, + 0x00002600:0x00002601, + 0x00002602:0x00002603, + 0x00002604:0x00002604, + 0x0000260e:0x0000260e, + 0x00002611:0x00002611, + 0x00002614:0x00002615, + 0x00002618:0x00002618, + 0x0000261d:0x0000261d, + 0x00002620:0x00002620, + 0x00002622:0x00002623, + 0x00002626:0x00002626, + 0x0000262a:0x0000262a, + 0x0000262e:0x0000262e, + 0x0000262f:0x0000262f, + 0x00002638:0x00002639, + 0x0000263a:0x0000263a, + 0x00002640:0x00002640, + 0x00002642:0x00002642, + 0x00002648:0x00002653, + 0x0000265f:0x0000265f, + 0x00002660:0x00002660, + 0x00002663:0x00002663, + 0x00002665:0x00002666, + 0x00002668:0x00002668, + 0x0000267b:0x0000267b, + 0x0000267e:0x0000267e, + 0x0000267f:0x0000267f, + 0x00002692:0x00002692, + 0x00002693:0x00002693, + 0x00002694:0x00002694, + 0x00002695:0x00002695, + 0x00002696:0x00002697, + 0x00002699:0x00002699, + 0x0000269b:0x0000269c, + 0x000026a0:0x000026a1, + 0x000026a7:0x000026a7, + 0x000026aa:0x000026ab, + 0x000026b0:0x000026b1, + 0x000026bd:0x000026be, + 0x000026c4:0x000026c5, + 0x000026c8:0x000026c8, + 0x000026ce:0x000026ce, + 0x000026cf:0x000026cf, + 0x000026d1:0x000026d1, + 0x000026d3:0x000026d3, + 0x000026d4:0x000026d4, + 0x000026e9:0x000026e9, + 0x000026ea:0x000026ea, + 0x000026f0:0x000026f1, + 0x000026f2:0x000026f3, + 0x000026f4:0x000026f4, + 0x000026f5:0x000026f5, + 0x000026f7:0x000026f9, + 0x000026fa:0x000026fa, + 0x000026fd:0x000026fd, + 0x00002702:0x00002702, + 0x00002705:0x00002705, + 0x00002708:0x0000270c, + 0x0000270d:0x0000270d, + 0x0000270f:0x0000270f, + 0x00002712:0x00002712, + 0x00002714:0x00002714, + 0x00002716:0x00002716, + 0x0000271d:0x0000271d, + 0x00002721:0x00002721, + 0x00002728:0x00002728, + 0x00002733:0x00002734, + 0x00002744:0x00002744, + 0x00002747:0x00002747, + 0x0000274c:0x0000274c, + 0x0000274e:0x0000274e, + 0x00002753:0x00002755, + 0x00002757:0x00002757, + 0x00002763:0x00002763, + 0x00002764:0x00002764, + 0x00002795:0x00002797, + 0x000027a1:0x000027a1, + 0x000027b0:0x000027b0, + 0x000027bf:0x000027bf, + 0x00002934:0x00002935, + 0x00002b05:0x00002b07, + 0x00002b1b:0x00002b1c, + 0x00002b50:0x00002b50, + 0x00002b55:0x00002b55, + 0x00003030:0x00003030, + 0x0000303d:0x0000303d, + 0x00003297:0x00003297, + 0x00003299:0x00003299, + 0x0001f004:0x0001f004, + 0x0001f0cf:0x0001f0cf, + 0x0001f170:0x0001f171, + 0x0001f17e:0x0001f17f, + 0x0001f18e:0x0001f18e, + 0x0001f191:0x0001f19a, + 0x0001f1e6:0x0001f1ff, + 0x0001f201:0x0001f202, + 0x0001f21a:0x0001f21a, + 0x0001f22f:0x0001f22f, + 0x0001f232:0x0001f23a, + 0x0001f250:0x0001f251, + 0x0001f300:0x0001f30c, + 0x0001f30d:0x0001f30e, + 0x0001f30f:0x0001f30f, + 0x0001f310:0x0001f310, + 0x0001f311:0x0001f311, + 0x0001f312:0x0001f312, + 0x0001f313:0x0001f315, + 0x0001f316:0x0001f318, + 0x0001f319:0x0001f319, + 0x0001f31a:0x0001f31a, + 0x0001f31b:0x0001f31b, + 0x0001f31c:0x0001f31c, + 0x0001f31d:0x0001f31e, + 0x0001f31f:0x0001f320, + 0x0001f321:0x0001f321, + 0x0001f324:0x0001f32c, + 0x0001f32d:0x0001f32f, + 0x0001f330:0x0001f331, + 0x0001f332:0x0001f333, + 0x0001f334:0x0001f335, + 0x0001f336:0x0001f336, + 0x0001f337:0x0001f34a, + 0x0001f34b:0x0001f34b, + 0x0001f34c:0x0001f34f, + 0x0001f350:0x0001f350, + 0x0001f351:0x0001f37b, + 0x0001f37c:0x0001f37c, + 0x0001f37d:0x0001f37d, + 0x0001f37e:0x0001f37f, + 0x0001f380:0x0001f393, + 0x0001f396:0x0001f397, + 0x0001f399:0x0001f39b, + 0x0001f39e:0x0001f39f, + 0x0001f3a0:0x0001f3c4, + 0x0001f3c5:0x0001f3c5, + 0x0001f3c6:0x0001f3c6, + 0x0001f3c7:0x0001f3c7, + 0x0001f3c8:0x0001f3c8, + 0x0001f3c9:0x0001f3c9, + 0x0001f3ca:0x0001f3ca, + 0x0001f3cb:0x0001f3ce, + 0x0001f3cf:0x0001f3d3, + 0x0001f3d4:0x0001f3df, + 0x0001f3e0:0x0001f3e3, + 0x0001f3e4:0x0001f3e4, + 0x0001f3e5:0x0001f3f0, + 0x0001f3f3:0x0001f3f3, + 0x0001f3f4:0x0001f3f4, + 0x0001f3f5:0x0001f3f5, + 0x0001f3f7:0x0001f3f7, + 0x0001f3f8:0x0001f407, + 0x0001f408:0x0001f408, + 0x0001f409:0x0001f40b, + 0x0001f40c:0x0001f40e, + 0x0001f40f:0x0001f410, + 0x0001f411:0x0001f412, + 0x0001f413:0x0001f413, + 0x0001f414:0x0001f414, + 0x0001f415:0x0001f415, + 0x0001f416:0x0001f416, + 0x0001f417:0x0001f429, + 0x0001f42a:0x0001f42a, + 0x0001f42b:0x0001f43e, + 0x0001f43f:0x0001f43f, + 0x0001f440:0x0001f440, + 0x0001f441:0x0001f441, + 0x0001f442:0x0001f464, + 0x0001f465:0x0001f465, + 0x0001f466:0x0001f46b, + 0x0001f46c:0x0001f46d, + 0x0001f46e:0x0001f4ac, + 0x0001f4ad:0x0001f4ad, + 0x0001f4ae:0x0001f4b5, + 0x0001f4b6:0x0001f4b7, + 0x0001f4b8:0x0001f4eb, + 0x0001f4ec:0x0001f4ed, + 0x0001f4ee:0x0001f4ee, + 0x0001f4ef:0x0001f4ef, + 0x0001f4f0:0x0001f4f4, + 0x0001f4f5:0x0001f4f5, + 0x0001f4f6:0x0001f4f7, + 0x0001f4f8:0x0001f4f8, + 0x0001f4f9:0x0001f4fc, + 0x0001f4fd:0x0001f4fd, + 0x0001f4ff:0x0001f502, + 0x0001f503:0x0001f503, + 0x0001f504:0x0001f507, + 0x0001f508:0x0001f508, + 0x0001f509:0x0001f509, + 0x0001f50a:0x0001f514, + 0x0001f515:0x0001f515, + 0x0001f516:0x0001f52b, + 0x0001f52c:0x0001f52d, + 0x0001f52e:0x0001f53d, + 0x0001f549:0x0001f54a, + 0x0001f54b:0x0001f54e, + 0x0001f550:0x0001f55b, + 0x0001f55c:0x0001f567, + 0x0001f56f:0x0001f570, + 0x0001f573:0x0001f579, + 0x0001f57a:0x0001f57a, + 0x0001f587:0x0001f587, + 0x0001f58a:0x0001f58d, + 0x0001f590:0x0001f590, + 0x0001f595:0x0001f596, + 0x0001f5a4:0x0001f5a4, + 0x0001f5a5:0x0001f5a5, + 0x0001f5a8:0x0001f5a8, + 0x0001f5b1:0x0001f5b2, + 0x0001f5bc:0x0001f5bc, + 0x0001f5c2:0x0001f5c4, + 0x0001f5d1:0x0001f5d3, + 0x0001f5dc:0x0001f5de, + 0x0001f5e1:0x0001f5e1, + 0x0001f5e3:0x0001f5e3, + 0x0001f5e8:0x0001f5e8, + 0x0001f5ef:0x0001f5ef, + 0x0001f5f3:0x0001f5f3, + 0x0001f5fa:0x0001f5fa, + 0x0001f5fb:0x0001f5ff, + 0x0001f600:0x0001f600, + 0x0001f601:0x0001f606, + 0x0001f607:0x0001f608, + 0x0001f609:0x0001f60d, + 0x0001f60e:0x0001f60e, + 0x0001f60f:0x0001f60f, + 0x0001f610:0x0001f610, + 0x0001f611:0x0001f611, + 0x0001f612:0x0001f614, + 0x0001f615:0x0001f615, + 0x0001f616:0x0001f616, + 0x0001f617:0x0001f617, + 0x0001f618:0x0001f618, + 0x0001f619:0x0001f619, + 0x0001f61a:0x0001f61a, + 0x0001f61b:0x0001f61b, + 0x0001f61c:0x0001f61e, + 0x0001f61f:0x0001f61f, + 0x0001f620:0x0001f625, + 0x0001f626:0x0001f627, + 0x0001f628:0x0001f62b, + 0x0001f62c:0x0001f62c, + 0x0001f62d:0x0001f62d, + 0x0001f62e:0x0001f62f, + 0x0001f630:0x0001f633, + 0x0001f634:0x0001f634, + 0x0001f635:0x0001f635, + 0x0001f636:0x0001f636, + 0x0001f637:0x0001f640, + 0x0001f641:0x0001f644, + 0x0001f645:0x0001f64f, + 0x0001f680:0x0001f680, + 0x0001f681:0x0001f682, + 0x0001f683:0x0001f685, + 0x0001f686:0x0001f686, + 0x0001f687:0x0001f687, + 0x0001f688:0x0001f688, + 0x0001f689:0x0001f689, + 0x0001f68a:0x0001f68b, + 0x0001f68c:0x0001f68c, + 0x0001f68d:0x0001f68d, + 0x0001f68e:0x0001f68e, + 0x0001f68f:0x0001f68f, + 0x0001f690:0x0001f690, + 0x0001f691:0x0001f693, + 0x0001f694:0x0001f694, + 0x0001f695:0x0001f695, + 0x0001f696:0x0001f696, + 0x0001f697:0x0001f697, + 0x0001f698:0x0001f698, + 0x0001f699:0x0001f69a, + 0x0001f69b:0x0001f6a1, + 0x0001f6a2:0x0001f6a2, + 0x0001f6a3:0x0001f6a3, + 0x0001f6a4:0x0001f6a5, + 0x0001f6a6:0x0001f6a6, + 0x0001f6a7:0x0001f6ad, + 0x0001f6ae:0x0001f6b1, + 0x0001f6b2:0x0001f6b2, + 0x0001f6b3:0x0001f6b5, + 0x0001f6b6:0x0001f6b6, + 0x0001f6b7:0x0001f6b8, + 0x0001f6b9:0x0001f6be, + 0x0001f6bf:0x0001f6bf, + 0x0001f6c0:0x0001f6c0, + 0x0001f6c1:0x0001f6c5, + 0x0001f6cb:0x0001f6cb, + 0x0001f6cc:0x0001f6cc, + 0x0001f6cd:0x0001f6cf, + 0x0001f6d0:0x0001f6d0, + 0x0001f6d1:0x0001f6d2, + 0x0001f6d5:0x0001f6d5, + 0x0001f6d6:0x0001f6d7, + 0x0001f6e0:0x0001f6e5, + 0x0001f6e9:0x0001f6e9, + 0x0001f6eb:0x0001f6ec, + 0x0001f6f0:0x0001f6f0, + 0x0001f6f3:0x0001f6f3, + 0x0001f6f4:0x0001f6f6, + 0x0001f6f7:0x0001f6f8, + 0x0001f6f9:0x0001f6f9, + 0x0001f6fa:0x0001f6fa, + 0x0001f6fb:0x0001f6fc, + 0x0001f7e0:0x0001f7eb, + 0x0001f90c:0x0001f90c, + 0x0001f90d:0x0001f90f, + 0x0001f910:0x0001f918, + 0x0001f919:0x0001f91e, + 0x0001f91f:0x0001f91f, + 0x0001f920:0x0001f927, + 0x0001f928:0x0001f92f, + 0x0001f930:0x0001f930, + 0x0001f931:0x0001f932, + 0x0001f933:0x0001f93a, + 0x0001f93c:0x0001f93e, + 0x0001f93f:0x0001f93f, + 0x0001f940:0x0001f945, + 0x0001f947:0x0001f94b, + 0x0001f94c:0x0001f94c, + 0x0001f94d:0x0001f94f, + 0x0001f950:0x0001f95e, + 0x0001f95f:0x0001f96b, + 0x0001f96c:0x0001f970, + 0x0001f971:0x0001f971, + 0x0001f972:0x0001f972, + 0x0001f973:0x0001f976, + 0x0001f977:0x0001f978, + 0x0001f97a:0x0001f97a, + 0x0001f97b:0x0001f97b, + 0x0001f97c:0x0001f97f, + 0x0001f980:0x0001f984, + 0x0001f985:0x0001f991, + 0x0001f992:0x0001f997, + 0x0001f998:0x0001f9a2, + 0x0001f9a3:0x0001f9a4, + 0x0001f9a5:0x0001f9aa, + 0x0001f9ab:0x0001f9ad, + 0x0001f9ae:0x0001f9af, + 0x0001f9b0:0x0001f9b9, + 0x0001f9ba:0x0001f9bf, + 0x0001f9c0:0x0001f9c0, + 0x0001f9c1:0x0001f9c2, + 0x0001f9c3:0x0001f9ca, + 0x0001f9cb:0x0001f9cb, + 0x0001f9cd:0x0001f9cf, + 0x0001f9d0:0x0001f9e6, + 0x0001f9e7:0x0001f9ff, + 0x0001fa70:0x0001fa73, + 0x0001fa74:0x0001fa74, + 0x0001fa78:0x0001fa7a, + 0x0001fa80:0x0001fa82, + 0x0001fa83:0x0001fa86, + 0x0001fa90:0x0001fa95, + 0x0001fa96:0x0001faa8, + 0x0001fab0:0x0001fab6, + 0x0001fac0:0x0001fac2, + 0x0001fad0:0x0001fad6, + 0x0000231a:0x0000231b, + 0x000023e9:0x000023ec, + 0x000023f0:0x000023f0, + 0x000023f3:0x000023f3, + 0x000025fd:0x000025fe, + 0x00002614:0x00002615, + 0x00002648:0x00002653, + 0x0000267f:0x0000267f, + 0x00002693:0x00002693, + 0x000026a1:0x000026a1, + 0x000026aa:0x000026ab, + 0x000026bd:0x000026be, + 0x000026c4:0x000026c5, + 0x000026ce:0x000026ce, + 0x000026d4:0x000026d4, + 0x000026ea:0x000026ea, + 0x000026f2:0x000026f3, + 0x000026f5:0x000026f5, + 0x000026fa:0x000026fa, + 0x000026fd:0x000026fd, + 0x00002705:0x00002705, + 0x0000270a:0x0000270b, + 0x00002728:0x00002728, + 0x0000274c:0x0000274c, + 0x0000274e:0x0000274e, + 0x00002753:0x00002755, + 0x00002757:0x00002757, + 0x00002795:0x00002797, + 0x000027b0:0x000027b0, + 0x000027bf:0x000027bf, + 0x00002b1b:0x00002b1c, + 0x00002b50:0x00002b50, + 0x00002b55:0x00002b55, + 0x0001f004:0x0001f004, + 0x0001f0cf:0x0001f0cf, + 0x0001f18e:0x0001f18e, + 0x0001f191:0x0001f19a, + 0x0001f1e6:0x0001f1ff, + 0x0001f201:0x0001f201, + 0x0001f21a:0x0001f21a, + 0x0001f22f:0x0001f22f, + 0x0001f232:0x0001f236, + 0x0001f238:0x0001f23a, + 0x0001f250:0x0001f251, + 0x0001f300:0x0001f30c, + 0x0001f30d:0x0001f30e, + 0x0001f30f:0x0001f30f, + 0x0001f310:0x0001f310, + 0x0001f311:0x0001f311, + 0x0001f312:0x0001f312, + 0x0001f313:0x0001f315, + 0x0001f316:0x0001f318, + 0x0001f319:0x0001f319, + 0x0001f31a:0x0001f31a, + 0x0001f31b:0x0001f31b, + 0x0001f31c:0x0001f31c, + 0x0001f31d:0x0001f31e, + 0x0001f31f:0x0001f320, + 0x0001f32d:0x0001f32f, + 0x0001f330:0x0001f331, + 0x0001f332:0x0001f333, + 0x0001f334:0x0001f335, + 0x0001f337:0x0001f34a, + 0x0001f34b:0x0001f34b, + 0x0001f34c:0x0001f34f, + 0x0001f350:0x0001f350, + 0x0001f351:0x0001f37b, + 0x0001f37c:0x0001f37c, + 0x0001f37e:0x0001f37f, + 0x0001f380:0x0001f393, + 0x0001f3a0:0x0001f3c4, + 0x0001f3c5:0x0001f3c5, + 0x0001f3c6:0x0001f3c6, + 0x0001f3c7:0x0001f3c7, + 0x0001f3c8:0x0001f3c8, + 0x0001f3c9:0x0001f3c9, + 0x0001f3ca:0x0001f3ca, + 0x0001f3cf:0x0001f3d3, + 0x0001f3e0:0x0001f3e3, + 0x0001f3e4:0x0001f3e4, + 0x0001f3e5:0x0001f3f0, + 0x0001f3f4:0x0001f3f4, + 0x0001f3f8:0x0001f407, + 0x0001f408:0x0001f408, + 0x0001f409:0x0001f40b, + 0x0001f40c:0x0001f40e, + 0x0001f40f:0x0001f410, + 0x0001f411:0x0001f412, + 0x0001f413:0x0001f413, + 0x0001f414:0x0001f414, + 0x0001f415:0x0001f415, + 0x0001f416:0x0001f416, + 0x0001f417:0x0001f429, + 0x0001f42a:0x0001f42a, + 0x0001f42b:0x0001f43e, + 0x0001f440:0x0001f440, + 0x0001f442:0x0001f464, + 0x0001f465:0x0001f465, + 0x0001f466:0x0001f46b, + 0x0001f46c:0x0001f46d, + 0x0001f46e:0x0001f4ac, + 0x0001f4ad:0x0001f4ad, + 0x0001f4ae:0x0001f4b5, + 0x0001f4b6:0x0001f4b7, + 0x0001f4b8:0x0001f4eb, + 0x0001f4ec:0x0001f4ed, + 0x0001f4ee:0x0001f4ee, + 0x0001f4ef:0x0001f4ef, + 0x0001f4f0:0x0001f4f4, + 0x0001f4f5:0x0001f4f5, + 0x0001f4f6:0x0001f4f7, + 0x0001f4f8:0x0001f4f8, + 0x0001f4f9:0x0001f4fc, + 0x0001f4ff:0x0001f502, + 0x0001f503:0x0001f503, + 0x0001f504:0x0001f507, + 0x0001f508:0x0001f508, + 0x0001f509:0x0001f509, + 0x0001f50a:0x0001f514, + 0x0001f515:0x0001f515, + 0x0001f516:0x0001f52b, + 0x0001f52c:0x0001f52d, + 0x0001f52e:0x0001f53d, + 0x0001f54b:0x0001f54e, + 0x0001f550:0x0001f55b, + 0x0001f55c:0x0001f567, + 0x0001f57a:0x0001f57a, + 0x0001f595:0x0001f596, + 0x0001f5a4:0x0001f5a4, + 0x0001f5fb:0x0001f5ff, + 0x0001f600:0x0001f600, + 0x0001f601:0x0001f606, + 0x0001f607:0x0001f608, + 0x0001f609:0x0001f60d, + 0x0001f60e:0x0001f60e, + 0x0001f60f:0x0001f60f, + 0x0001f610:0x0001f610, + 0x0001f611:0x0001f611, + 0x0001f612:0x0001f614, + 0x0001f615:0x0001f615, + 0x0001f616:0x0001f616, + 0x0001f617:0x0001f617, + 0x0001f618:0x0001f618, + 0x0001f619:0x0001f619, + 0x0001f61a:0x0001f61a, + 0x0001f61b:0x0001f61b, + 0x0001f61c:0x0001f61e, + 0x0001f61f:0x0001f61f, + 0x0001f620:0x0001f625, + 0x0001f626:0x0001f627, + 0x0001f628:0x0001f62b, + 0x0001f62c:0x0001f62c, + 0x0001f62d:0x0001f62d, + 0x0001f62e:0x0001f62f, + 0x0001f630:0x0001f633, + 0x0001f634:0x0001f634, + 0x0001f635:0x0001f635, + 0x0001f636:0x0001f636, + 0x0001f637:0x0001f640, + 0x0001f641:0x0001f644, + 0x0001f645:0x0001f64f, + 0x0001f680:0x0001f680, + 0x0001f681:0x0001f682, + 0x0001f683:0x0001f685, + 0x0001f686:0x0001f686, + 0x0001f687:0x0001f687, + 0x0001f688:0x0001f688, + 0x0001f689:0x0001f689, + 0x0001f68a:0x0001f68b, + 0x0001f68c:0x0001f68c, + 0x0001f68d:0x0001f68d, + 0x0001f68e:0x0001f68e, + 0x0001f68f:0x0001f68f, + 0x0001f690:0x0001f690, + 0x0001f691:0x0001f693, + 0x0001f694:0x0001f694, + 0x0001f695:0x0001f695, + 0x0001f696:0x0001f696, + 0x0001f697:0x0001f697, + 0x0001f698:0x0001f698, + 0x0001f699:0x0001f69a, + 0x0001f69b:0x0001f6a1, + 0x0001f6a2:0x0001f6a2, + 0x0001f6a3:0x0001f6a3, + 0x0001f6a4:0x0001f6a5, + 0x0001f6a6:0x0001f6a6, + 0x0001f6a7:0x0001f6ad, + 0x0001f6ae:0x0001f6b1, + 0x0001f6b2:0x0001f6b2, + 0x0001f6b3:0x0001f6b5, + 0x0001f6b6:0x0001f6b6, + 0x0001f6b7:0x0001f6b8, + 0x0001f6b9:0x0001f6be, + 0x0001f6bf:0x0001f6bf, + 0x0001f6c0:0x0001f6c0, + 0x0001f6c1:0x0001f6c5, + 0x0001f6cc:0x0001f6cc, + 0x0001f6d0:0x0001f6d0, + 0x0001f6d1:0x0001f6d2, + 0x0001f6d5:0x0001f6d5, + 0x0001f6d6:0x0001f6d7, + 0x0001f6eb:0x0001f6ec, + 0x0001f6f4:0x0001f6f6, + 0x0001f6f7:0x0001f6f8, + 0x0001f6f9:0x0001f6f9, + 0x0001f6fa:0x0001f6fa, + 0x0001f6fb:0x0001f6fc, + 0x0001f7e0:0x0001f7eb, + 0x0001f90c:0x0001f90c, + 0x0001f90d:0x0001f90f, + 0x0001f910:0x0001f918, + 0x0001f919:0x0001f91e, + 0x0001f91f:0x0001f91f, + 0x0001f920:0x0001f927, + 0x0001f928:0x0001f92f, + 0x0001f930:0x0001f930, + 0x0001f931:0x0001f932, + 0x0001f933:0x0001f93a, + 0x0001f93c:0x0001f93e, + 0x0001f93f:0x0001f93f, + 0x0001f940:0x0001f945, + 0x0001f947:0x0001f94b, + 0x0001f94c:0x0001f94c, + 0x0001f94d:0x0001f94f, + 0x0001f950:0x0001f95e, + 0x0001f95f:0x0001f96b, + 0x0001f96c:0x0001f970, + 0x0001f971:0x0001f971, + 0x0001f972:0x0001f972, + 0x0001f973:0x0001f976, + 0x0001f977:0x0001f978, + 0x0001f97a:0x0001f97a, + 0x0001f97b:0x0001f97b, + 0x0001f97c:0x0001f97f, + 0x0001f980:0x0001f984, + 0x0001f985:0x0001f991, + 0x0001f992:0x0001f997, + 0x0001f998:0x0001f9a2, + 0x0001f9a3:0x0001f9a4, + 0x0001f9a5:0x0001f9aa, + 0x0001f9ab:0x0001f9ad, + 0x0001f9ae:0x0001f9af, + 0x0001f9b0:0x0001f9b9, + 0x0001f9ba:0x0001f9bf, + 0x0001f9c0:0x0001f9c0, + 0x0001f9c1:0x0001f9c2, + 0x0001f9c3:0x0001f9ca, + 0x0001f9cb:0x0001f9cb, + 0x0001f9cd:0x0001f9cf, + 0x0001f9d0:0x0001f9e6, + 0x0001f9e7:0x0001f9ff, + 0x0001fa70:0x0001fa73, + 0x0001fa74:0x0001fa74, + 0x0001fa78:0x0001fa7a, + 0x0001fa80:0x0001fa82, + 0x0001fa83:0x0001fa86, + 0x0001fa90:0x0001fa95, + 0x0001fa96:0x0001faa8, + 0x0001fab0:0x0001fab6, + 0x0001fac0:0x0001fac2, + 0x0001fad0:0x0001fad6, + 0x0001f3fb:0x0001f3ff, + 0x0000261d:0x0000261d, + 0x000026f9:0x000026f9, + 0x0000270a:0x0000270c, + 0x0000270d:0x0000270d, + 0x0001f385:0x0001f385, + 0x0001f3c2:0x0001f3c4, + 0x0001f3c7:0x0001f3c7, + 0x0001f3ca:0x0001f3ca, + 0x0001f3cb:0x0001f3cc, + 0x0001f442:0x0001f443, + 0x0001f446:0x0001f450, + 0x0001f466:0x0001f46b, + 0x0001f46c:0x0001f46d, + 0x0001f46e:0x0001f478, + 0x0001f47c:0x0001f47c, + 0x0001f481:0x0001f483, + 0x0001f485:0x0001f487, + 0x0001f48f:0x0001f48f, + 0x0001f491:0x0001f491, + 0x0001f4aa:0x0001f4aa, + 0x0001f574:0x0001f575, + 0x0001f57a:0x0001f57a, + 0x0001f590:0x0001f590, + 0x0001f595:0x0001f596, + 0x0001f645:0x0001f647, + 0x0001f64b:0x0001f64f, + 0x0001f6a3:0x0001f6a3, + 0x0001f6b4:0x0001f6b5, + 0x0001f6b6:0x0001f6b6, + 0x0001f6c0:0x0001f6c0, + 0x0001f6cc:0x0001f6cc, + 0x0001f90c:0x0001f90c, + 0x0001f90f:0x0001f90f, + 0x0001f918:0x0001f918, + 0x0001f919:0x0001f91e, + 0x0001f91f:0x0001f91f, + 0x0001f926:0x0001f926, + 0x0001f930:0x0001f930, + 0x0001f931:0x0001f932, + 0x0001f933:0x0001f939, + 0x0001f93c:0x0001f93e, + 0x0001f977:0x0001f977, + 0x0001f9b5:0x0001f9b6, + 0x0001f9b8:0x0001f9b9, + 0x0001f9bb:0x0001f9bb, + 0x0001f9cd:0x0001f9cf, + 0x0001f9d1:0x0001f9dd, + 0x000020e3:0x000020e3, + 0x0001f1e6:0x0001f1ff, + 0x0001f3fb:0x0001f3ff, + 0x0001f9b0:0x0001f9b3, + 0x000e0020:0x000e007f, +] diff --git a/stdlib/Unicode/test/runtests.jl b/stdlib/Unicode/test/runtests.jl index d84137de11054..8131aa2109d6d 100644 --- a/stdlib/Unicode/test/runtests.jl +++ b/stdlib/Unicode/test/runtests.jl @@ -3,6 +3,7 @@ using Test using Unicode using Unicode: normalize, isassigned +using Downloads @testset "string normalization" begin # normalize (Unicode normalization etc.): @@ -404,3 +405,96 @@ end @test prod(["*" for i in 1:3]) == "***" @test prod(["*" for i in 1:0]) == "" end + +@testset "Emoji tests" begin + + """ + extract_emoji_column(emoji_data, column = 1; type_field = "") + Read the selected column from a provided unicode emoji data file + (i.e. https://www.unicode.org/Public/13.0.0/ucd/emoji/emoji-data.txt). + Optionally select only columns beginning with `type_field` + """ + function extract_emoji_column(emoji_data, column = 1; type_field = "") + lines = readlines(emoji_data) + filter!(line -> !isempty(line) && !startswith(line, "#"), lines) + splitlines = [strip.(split(line, ";")) for line in lines] + first_col = [splitline[column] for splitline in splitlines if startswith(splitline[2], type_field)] + end + + # parse a string of the form "AAAA...FFFF" into 0xAAAA:0xFFFF + function parse_unicode_range_str(range_str) + s = split(range_str, "..") + if length(s) > 2 || length(s) < 1 + return nothing + else + s1 = tryparse(UInt32, "0x" * s[1]) + s1 === nothing && return nothing + if length(s) == 1 + return s1:s1 + else + s2 = tryparse(UInt32, "0x" * s[2]) + s2 === nothing && return nothing + return s1:s2 + end + end + end + + # parse a string of the form "AAAA BBBB CCCC" into [0xAAAA, 0xBBBB, 0xCCCC] + function parse_sequence_str(seq_str) + s = split(seq_str) + res = tryparse.(UInt32, "0x" .* s) + if all(isnothing.(res)) + return nothing + else + return res + end + end + + # Parse string containing range (i.e. AAAA..FFFF) or sequence (AAAA BBBB CCCC) of unicode codepoints into an array of strings + # Ranges are parsed as independent characters ("AAAA...FFFF") -> ["\uAAAA", "\uBBBB", ..., "\u"FFFF"] + # Sequences are parsed as a single string ("AAAA BBBB") -> ["\uAAAA\uBBBB"] + function parse_col_entry(seq_str) + s = parse_sequence_str(seq_str) + if s === nothing + s = "" .* Char.(parse_unicode_range_str(seq_str) |> collect) + else + s = [Char.(s) |> String] + end + return s + end + + function extract_emoji_sequences(emoji_data; type_field = "") + codepoints = extract_emoji_column(emoji_data; type_field) + emojis = parse_col_entry.(codepoints) + vcat(emojis...) + end + + # See if all emojis are caught by the isemoji function + emoji_sequences = Downloads.download("https://www.unicode.org/Public/emoji/13.1/emoji-sequences.txt") + emoji_zwj_sequences = Downloads.download("https://www.unicode.org/Public/emoji/13.1/emoji-zwj-sequences.txt") + + all_emojis = [ + extract_emoji_sequences(emoji_sequences); + extract_emoji_sequences(emoji_zwj_sequences); + ] + #@show filter(!isemoji, all_emojis) + @test all(isemoji.(all_emojis)) + @test !isemoji('A') + @test !isemoji("๐Ÿ”น some text bounded by emojis ๐Ÿ”น") + @test !isemoji("๐Ÿš some text after an emoji") + @test !isemoji("some text before an emoji ๐Ÿš˜") + @test !isemoji("๐Ÿ˜ฎ ๐Ÿ˜ฅ ๐Ÿ˜จ ๐Ÿ˜ฉ ๐Ÿ˜ช") + @test !isemoji("No emojis here") + + # Test emoji sequences + @test !isemoji("๐Ÿ˜ˆ๐Ÿ˜˜") + @test isemoji("๐Ÿšด๐Ÿฟ") + @test !isemoji("๐Ÿ‘จโ€๐Ÿ‘ง" * Unicode.ZWJ) + @test isemoji("๐Ÿ›Œ" * Unicode.ZWJ * '๐Ÿ˜Ž') + @test !isemoji("๐Ÿคฆ๐Ÿฝ" * Unicode.ZWJ * Unicode.ZWJ * '๐Ÿ˜Ž') + @test !isemoji("") + + teststring = "My family looks like this: ๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ and I โค๏ธ them" + test_graphemes = [g for g in graphemes(teststring)] + @test filter(isemoji, test_graphemes) == SubString{String}["๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ", "โค๏ธ"] +end