In [1]:
#!/usr/bin/env python3
import sys
import unicodedata

In [2]:
import numpy as np
import pandas as pd
from collections import defaultdict, Counter
import pickle

In [3]:
import emoji as em

In [4]:
from tqdm.notebook import tqdm

In [6]:
with open('emoji2dict.pkl', 'rb') as file:
    emoji_dict = pickle.load(file)

In [7]:
for emoji in emoji_dict:
    emoji_dict[emoji]['emoji_char'] = emoji

In [8]:
emoji_dict[emoji]['emoji_char']

'🇦🇽'

In [9]:
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer


In [10]:
sentence_encoder = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')



In [11]:
vector_dict = {}

for emoji in tqdm(emoji_dict):
    vector_dict[emoji] = sentence_encoder.encode(
        emoji_dict[emoji]["Description"] + 
        ' '.join(str(s) for s in emoji_dict[emoji]["Semantic_Tags"])
    )

  0%|          | 0/5034 [00:00<?, ?it/s]

In [12]:
client = QdrantClient(":memory:")

In [13]:
client.recreate_collection(
    collection_name="EMOJIS",
    vectors_config=models.VectorParams(
        size=sentence_encoder.get_sentence_embedding_dimension(),
        distance=models.Distance.COSINE,
    ),
)


  client.recreate_collection(


True

In [14]:
client.upload_points(
    collection_name="EMOJIS",
    points=[
        models.PointStruct(
            id=idx, 
            vector=vector_dict[emoji].tolist(),
            payload=emoji_dict[emoji]
        )
        for idx, emoji in enumerate(emoji_dict)
    ],
)

In [15]:
emoji_dict['🧬']

{'Emoji': '🧬',
 'Description': 'This emoji represents DNA, the genetic material found in living organisms, symbolizing the blueprint of life and the basis of heredity and evolution.',
 'Semantic_Tags': ['DNA',
  'genetics',
  'heredity',
  'evolution',
  'biology',
  'molecule',
  'science'],
 'emoji_char': '🧬'}

In [59]:
def return_simialr_emojis(query):
    hits = client.search(
        collection_name="EMOJIS",
        query_vector=sentence_encoder.encode(query).tolist(),
        limit=40,
    )

    hit_emojis = set()

    for i, hit in enumerate(hits):
        emoji_char = hit.payload['emoji_char']
        score = hit.score

        _ord = ''
        for c in emoji_char:
            _ord += str(ord(c)) + ' '

        _spec = len(emoji_char) + 3
        unicode_desc = ' '.join(em.demojize(emoji_char).split('_'))[1:-1].upper()

        if emoji_char not in hit_emojis: 
            print(f"{i:<3} {emoji_char:<{_spec}}", end='')
            print(f"{score:<7.3f}", end= '')
            print(f"{unicode_desc:<55}")
            
        hit_emojis.add(emoji_char)

In [60]:
return_simialr_emojis(
    "DNA Biologie Labor Forschung"
)

0   🧬   0.484  DNA                                                    
1   👩🏾‍🔬   0.314  WOMAN SCIENTIST MEDIUM-DARK SKIN TONE                  
2   🅾   0.310  O BUTTON (BLOOD TYPE)                                  
3   🅾️   0.310  O BUTTON (BLOOD TYPE)                                  
4   👩‍🔬   0.306  WOMAN SCIENTIST                                        
5   🧪   0.299  TEST TUBE                                              
6   👩🏿‍🔬   0.298  WOMAN SCIENTIST DARK SKIN TONE                         
7   👨‍🔬   0.292  MAN SCIENTIST                                          
8   👩🏽‍🔬   0.289  WOMAN SCIENTIST MEDIUM SKIN TONE                       
9   🅱   0.289  B BUTTON (BLOOD TYPE)                                  
10  👨🏾‍🔬   0.289  MAN SCIENTIST MEDIUM-DARK SKIN TONE                    
11  🅰   0.287  A BUTTON (BLOOD TYPE)                                  
12  👩🏻‍🔬   0.285  WOMAN SCIENTIST LIGHT SKIN TONE                        
13  🧑🏽‍🔬   0.284  SCIENTIST MEDIUM SKIN TONE             

In [61]:
return_simialr_emojis(
    "جينات بيولوجيا بحث"
)

0   🧬   0.580  DNA                                                    
1   🦠   0.413  MICROBE                                                
2   👨‍🔬   0.338  MAN SCIENTIST                                          
3   👩🏾‍🔬   0.332  WOMAN SCIENTIST MEDIUM-DARK SKIN TONE                  
4   🧑‍🔬   0.325  SCIENTIST                                              
5   👩‍🔬   0.322  WOMAN SCIENTIST                                        
6   🧑🏽‍🔬   0.311  SCIENTIST MEDIUM SKIN TONE                             
7   🅾   0.304  O BUTTON (BLOOD TYPE)                                  
8   🅾️   0.304  O BUTTON (BLOOD TYPE)                                  
9   👩🏿‍🔬   0.303  WOMAN SCIENTIST DARK SKIN TONE                         
10  🧑🏾‍🔬   0.298  SCIENTIST MEDIUM-DARK SKIN TONE                        
11  🧑🏿‍🔬   0.297  SCIENTIST DARK SKIN TONE                               
12  🅱   0.295  B BUTTON (BLOOD TYPE)                                  
13  🧪   0.291  TEST TUBE                               

In [62]:
return_simialr_emojis("علوم الفلك  و الفضاء")

0   🪐   0.562  RINGED PLANET                                          
1   🌌   0.518  MILKY WAY                                              
2   👨‍🚀   0.499  MAN ASTRONAUT                                          
3   ☄   0.489  COMET                                                  
4   🔭   0.480  TELESCOPE                                              
5   ☄️   0.473  COMET                                                  
6   👩‍🚀   0.458  WOMAN ASTRONAUT                                        
7   🧑‍🚀   0.440  ASTRONAUT                                              
8   🧑🏻‍🚀   0.437  ASTRONAUT LIGHT SKIN TONE                              
9   🧑🏽‍🚀   0.422  ASTRONAUT MEDIUM SKIN TONE                             
10  👨🏽‍🚀   0.421  MAN ASTRONAUT MEDIUM SKIN TONE                         
11  🌃   0.420  NIGHT WITH STARS                                       
12  👩🏻‍🚀   0.417  WOMAN ASTRONAUT LIGHT SKIN TONE                        
13  🛰️   0.416  SATELLITE                                 

In [63]:
return_simialr_emojis("science astronomy space")

0   🪐   0.479  RINGED PLANET                                          
1   🔭   0.469  TELESCOPE                                              
2   🌌   0.429  MILKY WAY                                              
3   ☄️   0.402  COMET                                                  
4   ☄   0.397  COMET                                                  
5   👨‍🚀   0.378  MAN ASTRONAUT                                          
6   🔬   0.349  MICROSCOPE                                             
7   🧑‍🚀   0.344  ASTRONAUT                                              
8   🛰️   0.342  SATELLITE                                              
9   🛰   0.341  SATELLITE                                              
10  🧑🏻‍🚀   0.341  ASTRONAUT LIGHT SKIN TONE                              
11  👩🏿‍🚀   0.339  WOMAN ASTRONAUT DARK SKIN TONE                         
12  👩‍🚀   0.339  WOMAN ASTRONAUT                                        
13  🌃   0.332  NIGHT WITH STARS                                

In [64]:
return_simialr_emojis("snow winter sport")

0   🎿   0.668  SKIS                                                   
1   🏂🏽   0.658  SNOWBOARDER MEDIUM SKIN TONE                           
2   ⛷   0.630  SKIER                                                  
3   ⛷️   0.623  SKIER                                                  
4   🏂🏼   0.618  SNOWBOARDER MEDIUM-LIGHT SKIN TONE                     
5   🛷   0.615  SLED                                                   
6   🏂   0.606  SNOWBOARDER                                            
7   🏂🏿   0.602  SNOWBOARDER DARK SKIN TONE                             
8   🏂🏾   0.591  SNOWBOARDER MEDIUM-DARK SKIN TONE                      
9   🏂🏻   0.585  SNOWBOARDER LIGHT SKIN TONE                            
10  ☃️   0.581  SNOWMAN                                                
11  ☃   0.581  SNOWMAN                                                
12  ❄   0.570  SNOWFLAKE                                              
13  ⛸   0.555  ICE SKATE                                              

In [67]:
return_simialr_emojis("virus health pandemic")
# mexican asian french italina food

0   😷   0.297  FACE WITH MEDICAL MASK                                 
1   🤒   0.269  FACE WITH THERMOMETER                                  
2   🌪   0.262  TORNADO                                                
3   🌪️   0.262  TORNADO                                                
4   🌩   0.258  CLOUD WITH LIGHTNING                                   
5   🌩️   0.258  CLOUD WITH LIGHTNING                                   
6   🦠   0.247  MICROBE                                                
7   ⛈️   0.234  CLOUD WITH LIGHTNING AND RAIN                          
8   🦟   0.231  MOSQUITO                                               
9   ⛈   0.230  CLOUD WITH LIGHTNING AND RAIN                          
10  🤮   0.222  FACE VOMITING                                          
11  🌀   0.210  CYCLONE                                                
12  🐅   0.206  TIGER                                                  
13  🦹🏿   0.206  SUPERVILLAIN DARK SKIN TONE                            
14

In [68]:
return_simialr_emojis("new beginnings")
# ambition

0   🌅   0.443  SUNRISE                                                
1   🥚   0.429  EGG                                                    
2   🌄   0.416  SUNRISE OVER MOUNTAINS                                 
3   👶   0.415  BABY                                                   
4   🌚   0.415  NEW MOON FACE                                          
5   🌱   0.415  SEEDLING                                               
6   🌑   0.388  NEW MOON                                               
7   🔰   0.384  JAPANESE SYMBOL FOR BEGINNER                           
8   🐣   0.377  HATCHING CHICK                                         
9   🚪   0.359  DOOR                                                   
10  🆕   0.332  NEW BUTTON                                             
11  🌼   0.332  BLOSSOM                                                
12  👶🏽   0.329  BABY MEDIUM SKIN TONE                                  
13  👶🏼   0.325  BABY MEDIUM-LIGHT SKIN TONE                            
14  

In [70]:
return_simialr_emojis("Katze Haustier")
# ambition

0   🐈   0.634  CAT                                                    
1   😸   0.607  GRINNING CAT WITH SMILING EYES                         
2   🐅   0.605  TIGER                                                  
3   😼   0.602  CAT WITH WRY SMILE                                     
4   🐱   0.592  CAT FACE                                               
5   🐆   0.591  LEOPARD                                                
6   😺   0.584  GRINNING CAT                                           
7   🐈‍⬛   0.579  BLACK CAT                                              
8   😾   0.573  POUTING CAT                                            
9   😻   0.566  SMILING CAT WITH HEART-EYES                            
10  😽   0.548  KISSING CAT                                            
11  😹   0.537  CAT WITH TEARS OF JOY                                  
12  😿   0.513  CRYING CAT                                             
13  🐹   0.506  HAMSTER                                                
14  

In [25]:
return_simialr_emojis("ambition growth success")

🚀   0.356  ROCKET                                                 
📈   0.355  CHART INCREASING                                       
🌱   0.337  SEEDLING                                               
💹   0.336  CHART INCREASING WITH YEN                              
🏗️   0.307  BUILDING CONSTRUCTION                                  
💸   0.286  MONEY WITH WINGS                                       
🧗   0.284  PERSON CLIMBING                                        
🚧   0.281  CONSTRUCTION                                           
🎯   0.272  BULLSEYE                                               
↗️   0.271  UP-RIGHT ARROW                                         
🥅   0.271  GOAL NET                                               
🏗   0.270  BUILDING CONSTRUCTION                                  
↗   0.265  UP-RIGHT ARROW                                         
🌟   0.252  GLOWING STAR                                           
💯   0.244  HUNDRED POINTS                                   

In [26]:
return_simialr_emojis("natural disaster")

🌀   0.430  CYCLONE                                                
💥   0.398  COLLISION                                              
🌪️   0.380  TORNADO                                                
🌪   0.380  TORNADO                                                
🌋   0.370  VOLCANO                                                
💣   0.345  BOMB                                                   
🚒   0.328  FIRE ENGINE                                            
🧑‍🚒   0.316  FIREFIGHTER                                            
⛈️   0.300  CLOUD WITH LIGHTNING AND RAIN                          
⛈   0.300  CLOUD WITH LIGHTNING AND RAIN                          
🌩   0.298  CLOUD WITH LIGHTNING                                   
🌩️   0.298  CLOUD WITH LIGHTNING                                   
👨‍🚒   0.295  MAN FIREFIGHTER                                        
🏝   0.292  DESERT ISLAND                                          
🌧️   0.291  CLOUD WITH RAIN                            

In [72]:
return_simialr_emojis("Animals you can only find in America")
# 

0   🦬   0.476  BISON                                                  
1   🦥   0.465  SLOTH                                                  
2   🦙   0.437  LLAMA                                                  
3   🐃   0.428  WATER BUFFALO                                          
4   🦌   0.421  DEER                                                   
5   🐄   0.415  COW                                                    
6   🐂   0.412  OX                                                     
7   🐗   0.408  BOAR                                                   
8   🐺   0.398  WOLF                                                   
9   🐹   0.393  HAMSTER                                                
10  🐷   0.390  PIG FACE                                               
11  🫏   0.381  DONKEY                                                 
12  🐖   0.371  PIG                                                    
13  🐕   0.371  DOG                                                    
14  🦊 

In [28]:
emoji_dict['🥨']

{'Emoji': '🥨',
 'Description': 'This emoji represents a pretzel, a popular baked snack made from dough, often shaped into various forms and enjoyed as a tasty treat.',
 'Semantic_Tags': ['pretzel', 'snack', 'food', 'baked', 'dough', 'treat'],
 'emoji_char': '🥨'}

In [77]:
return_simialr_emojis("victory winning in life")

0   🥇   0.518  1ST PLACE MEDAL                                        
1   🏆   0.489  TROPHY                                                 
2   🥈   0.467  2ND PLACE MEDAL                                        
3   ✌️   0.400  VICTORY HAND                                           
4   💯   0.394  HUNDRED POINTS                                         
5   ✌🏾   0.393  VICTORY HAND MEDIUM-DARK SKIN TONE                     
6   🧍🏻‍♂   0.392  MAN STANDING LIGHT SKIN TONE                           
7   🥉   0.385  3RD PLACE MEDAL                                        
8   ✌   0.381  VICTORY HAND                                           
9   🏅   0.380  SPORTS MEDAL                                           
10  ✋   0.356  RAISED HAND                                            
11  🧍🏿‍♂   0.343  MAN STANDING DARK SKIN TONE                            
12  🧍🏿‍♂️   0.339  MAN STANDING DARK SKIN TONE                            
13  🌱   0.339  SEEDLING                                          

In [96]:
return_simialr_emojis("popular sport in Canada")

0   🥍   0.494  LACROSSE                                               
1   🏑   0.477  FIELD HOCKEY                                           
2   🏐   0.464  VOLLEYBALL                                             
3   🇨🇦   0.463  CANADA                                                 
4   🏀   0.461  BASKETBALL                                             
5   🏏   0.444  CRICKET GAME                                           
6   🥅   0.427  GOAL NET                                               
7   🏉   0.426  RUGBY FOOTBALL                                         
8   ⚾   0.415  BASEBALL                                               
9   🏓   0.406  PING PONG                                              
10  🏒   0.398  ICE HOCKEY                                             
11  🎾   0.398  TENNIS                                                 
12  🪃   0.385  BOOMERANG                                              
13  🤾‍♂️   0.383  MAN PLAYING HANDBALL                                   
14

In [88]:
emoji_dict["🪳"]

{'Emoji': '🪳',
 'Description': 'This emoji represents a cockroach, a small, flat insect known for its quick movement and often associated with unclean environments and pests.',
 'Semantic_Tags': ['cockroach', 'insect', 'pest', 'small', 'flat', 'unclean'],
 'emoji_char': '🪳'}

In [97]:
emoji_dict_with_vector = {
    emoji: {
        **emoji_dict[emoji],
        "vector": vector_dict[emoji]
    }
    for emoji in emoji_dict
}

In [98]:
emoji_dict_with_vector['🪳']

{'Emoji': '🪳',
 'Description': 'This emoji represents a cockroach, a small, flat insect known for its quick movement and often associated with unclean environments and pests.',
 'Semantic_Tags': ['cockroach', 'insect', 'pest', 'small', 'flat', 'unclean'],
 'emoji_char': '🪳',
 'vector': array([-1.05071940e-01,  1.54165015e-01,  1.55826941e-01, -1.97199374e-01,
        -2.28303745e-01, -2.87494004e-01,  1.98311001e-01, -2.23975793e-01,
        -1.06316775e-01,  5.51524051e-02,  2.55397260e-01,  4.91766706e-02,
         2.46094257e-01,  9.21425819e-02,  2.57114600e-02,  1.92957729e-01,
         8.77838805e-02,  3.20467085e-01,  2.51201093e-01, -3.97997089e-02,
         3.53387296e-01,  3.15393209e-01,  2.13830784e-01,  3.08327287e-01,
        -1.40273586e-01, -2.44938359e-01, -1.82471082e-01,  2.77699888e-01,
         8.33257064e-02, -8.43241066e-02,  4.49527115e-01,  2.60394722e-01,
         5.25033250e-02,  1.12449668e-01, -2.83867300e-01,  2.91387796e-01,
        -5.21503270e-01,  1.83

In [99]:
with open('emoji_dict_with_vectors.pkl', 'wb') as file:
    pickle.dump(emoji_dict_with_vector, file)


In [None]:
import pickle

with open('filename.pkl', 'rb') as file:
    loaded_object = pickle.load(file)
