In [110]:
# There is a lot of duplication in the top256 most popular emojis - both visually and semantically.
# In mediums where an emoji is used, there is a narrower band of emotions expressed when compared to the full
# range of human emotions. By choosing less popular emojis, we can expand the range of expressiveness.

# These are the *loose* considerations for choosing/removing emojis
# - Popular in usage
# - Visually unique
# - Emotionally rich
# - Distinct. When two emojis are similar - choose the most powerful, most visually unique, or most popular.
# - Culturally ubiquitous (to a degree; i.e. it is impossible to avoid culture when talking about food.)
# - Non-religious/non-political. Avoid religious symbols and flags.
# - Useful as a medium for storytelling (e.g. focus on faces and emotions)
# - b'\0' should visually stand out. Emojis are sorted lexicographically.
# - Prefer animal whole-body silhouettes

# This was done manually. Below are scripts to help preview changes and spot duplicates.

with open('selection.txt', 'r', encoding='utf-8') as f:
    lines = f.readlines()

emojis = []
for line in lines:
    emojis.extend(line.strip().split())

print(emojis)

In [111]:
emojis.sort()
with open('selection.txt', 'w', encoding='utf-8') as f:
    for emo in emojis:
        f.write(emo + '\n')
with open('selection-sortedLex.txt', 'w', encoding='utf-8') as f:
    for i in range(0, len(emojis), 16):
        emoji_line = emojis[i:i+16]
        f.write(''.join(emoji_line) + '\n')
        print(''.join(emoji_line) + '\n')

In [112]:
# Generate 128 samples of what a 128-bit key would look like
import random

with open('selection-sample128.txt', 'w', encoding='utf-8') as f:
    for i in range(128):
        sample = random.sample(emojis, 16)
        f.write(''.join(sample) + '\n')
        print(''.join(sample) + '\n')

In [99]:
# Save selection to new emoji256 list.
with open('emoji256-v1-sortedLex.txt', 'w', encoding='utf-8') as f:
    for i in range(0, len(emojis), 16):
        emoji_line = emojis[i:i+16]
        f.write(''.join(emoji_line) + '\n')
        print(''.join(emoji_line) + '\n')

In [114]:
# For writing to table.rs
from unicodedata import normalize

with open('emojis.txt', 'w', encoding='utf-8') as f:
    for i in range(0, len(emojis), 16):
        emoji_line = map(lambda s: normalize('NFKC', s), emoji_line)
        emoji_line = map(lambda emo: f"'{emo}'", emojis[i:i+16])
        line = ', '.join(emoji_line) + ',\n'
        f.write(line)
        print(line)