In [1]:
import json
from difflib import SequenceMatcher

In [5]:
# https://github.com/muan/emojilib
with open("emoji-en-US.json", encoding="utf8") as file:
    source = json.load(file)

# Reshape dict into list
reshaped = []
for (emoji, values) in source.items():
    name = values[:1][0]
    words = values[1:]
    reshaped.append((emoji, name, words))

with open("emoji-en-US-reshaped.json", encoding="utf8", mode="w") as file:
    json.dump(reshaped, file)

In [43]:
with open("emoji-en-US-reshaped.json", encoding="utf8") as file:
    source: list[tuple[str, str, list[str]]] = json.load(file)


def get_match_ratio(query, words, threshold = 0.7):
    match_ratio = 0.0
    match_word = ""

    for word in words:
        ratio = SequenceMatcher(None, query, word).ratio()

        if ratio > threshold and ratio > match_ratio:
            match_ratio = ratio
            match_word = word
    
    return match_ratio, match_word


def search_list(query, source):
    results = []

    for entry in source:
        name_ratio, name_word = get_match_ratio(query, entry[1].split("_"))

        # Boost direct name matches
        if name_ratio == 1.0:
                name_ratio = 10.0

        extra_ratio, extra_word = get_match_ratio(query, entry[2])

        if extra_ratio > name_ratio:
            results.append([extra_ratio, extra_word, *entry])
        elif name_ratio > 0:
            results.append([name_ratio, name_word, *entry])

    # Sort by name, then reverse ratio
    # Works because python has stable sort
    results.sort(key=lambda x: x[3])
    results.sort(key=lambda x: -x[0])
    return results


results = search_list("fruit", source)
for result in results:
    print(*result)

10.0 fruit 🥝 kiwi_fruit ['fruit', 'kiwifruit', 'gooseberry', 'chinese', 'kiwi', 'food']
1.0 fruit 🥑 avocado ['avocado', 'fruit', 'food']
1.0 fruit 🍌 banana ['fruit', 'plant', 'plantain', 'banana', 'monkey', 'food']
1.0 fruit 🫑 bell pepper ['plant', 'fruit', 'bell pepper', 'vegetable', 'capsicum']
1.0 fruit 🫐 blueberries ['fruit', 'blueberries', 'bilberry', 'blueberry', 'berry', 'blue']
1.0 fruit 🥦 broccoli ['broccoli', 'fruit', 'wild', 'cabbage', 'vegetable', 'food']
1.0 fruit 🍒 cherries ['fruit', 'berries', 'plant', 'cherry', 'cherries', 'wild', 'red', 'food']
1.0 fruit 🥥 coconut ['coconut', 'fruit', 'piña', 'palm', 'cocoanut', 'nature', 'colada', 'food']
1.0 fruit 🥒 cucumber ['fruit', 'pickle', 'cucumber', 'gherkin', 'vegetable', 'food']
1.0 fruit 🍇 grapes ['fruit', 'plant', 'grape', 'wine', 'grapes', 'food']
1.0 fruit 🍏 green_apple ['fruit', 'plant', 'golden', 'delicious', 'apple', 'granny', 'nature', 'smith', 'green']
1.0 fruit 🍋 lemon ['fruit', 'plant', 'lemon', 'citrus', 'nature'